# Defi IA 2023 - Prédiction du prix d'une chambre d'hôtels

In [1]:
import pandas as pd
import numpy as np
import os
import glob
import urllib.parse
import requests
import os

In [2]:
domain = "51.91.251.0"
port = 3000
host = f"http://{domain}:{port}"
path = lambda x: urllib.parse.urljoin(host, x)
user_id = '88be3640-82ae-46c6-ac59-92a9679d453d'

## Création d'un avatar

In [None]:
name = 'laulau' #donner le nom de l'avatar
new_avatar = requests.post(path(f'avatars/{user_id}/{name}'))

In [3]:
avatar_info = requests.get(path(f"avatars/{user_id}"))
avatar_names = []

for avatar in avatar_info.json():
    avatar_names.append(avatar['name'])

avatar_names

['laulau', 'Nini', 'Tess']

## Excécution des requêtes

In [4]:
cities=['amsterdam','madrid','paris','rome','sofia','vienna']
dates=[30]
languages=['austrian','french','german','greek','hungarian','irish','italian','lithuanian','luxembourgish', 'maltese','polish','slovakian','spanish','swedish']
mobiles=[0,1]
avatar = 'Nini' #nom de l'avatar à tester

if not os.path.exists('requests'):
    os.mkdir('requests')


#listes contenant toutes les combinaisons possibles pour un avatar
combination=[]
for count1,date in enumerate(dates):
    for count2,city in enumerate(cities):
        for count3,language in enumerate(languages):
            for count4,mobile in enumerate(mobiles):
                combination.append((city,date,language,mobile))
                params = {
                "avatar_name": avatar,
                "language": language,
                "city": city,
                "date": date,
                "mobile": mobile,}

                ## REQUÊTE ##
                requete = requests.get(path(f"pricing/{user_id}"), params=params)
                requete.json()
                pricing_requests = []

                request = [requete]
                for requete in request:
                    pricing_requests.append(
                        pd.DataFrame(requete.json()['prices']).assign(**requete.json()['request'])
                    )

                pricing_requests = pd.concat(pricing_requests)
                pricing_requests.to_csv(os.path.join('requests', str(avatar) +'_'+str(city)+ '_' + str(date) + '_' + str(language) + '_' + str(mobile)+'.csv'))

KeyError: 'prices'

In [5]:
requete.json()

{'detail': "Too many requests in the past week. You've done 1000 requests, while the limit is 1000."}

## Mise en lien des requêtes et des features des differents hotels

In [4]:
hotels_features = pd.read_csv(os.path.join('features', 'features_hotels.csv'))
laulau_requests_copenhagen = pd.read_csv(os.path.join('requests', 'laulau_copenhagen_30_0.csv'))
laulau_requests_copenhagen = laulau_requests_copenhagen.drop(columns = 'Unnamed: 0')

In [5]:
laulau_requests_copenhagen

Unnamed: 0,hotel_id,price,stock,city,date,language,mobile,avatar_id
0,519,208,68,copenhagen,30,french,0,3533
1,523,85,41,copenhagen,30,french,0,3533
2,898,86,24,copenhagen,30,french,0,3533
3,770,210,73,copenhagen,30,french,0,3533
4,562,215,74,copenhagen,30,french,0,3533
...,...,...,...,...,...,...,...,...
139,332,208,86,copenhagen,30,french,0,3533
140,978,299,120,copenhagen,30,french,0,3533
141,335,91,46,copenhagen,30,french,0,3533
142,385,283,101,copenhagen,30,french,0,3533


La commande ci-dessous permet de charger les informations en lien avec la requête lancée. Ainsi, on peut lier les caractéristiques de l'hôtel avec la recherche effectuée par un avatar, dans une ville et une langue particulière.  

In [6]:
features_laulau_copenhagen = hotels_features.loc[hotels_features['hotel_id'].loc[laulau_requests_copenhagen['hotel_id']]]
features_laulau_copenhagen = features_laulau_copenhagen.reset_index()

In [7]:
df = pd.concat([features_laulau_copenhagen, laulau_requests_copenhagen[['price', 'stock', 'date', 'language', 'mobile']]], axis = 1)
df

Unnamed: 0,index,hotel_id,group,brand,city,parking,pool,children_policy,price,stock,date,language,mobile
0,519,519,Yin Yang,Royal Lotus,copenhagen,0,0,0,208,68,30,french,0
1,523,523,Independant,Independant,copenhagen,0,0,0,85,41,30,french,0
2,898,898,Yin Yang,8 Premium,copenhagen,1,0,0,86,24,30,french,0
3,770,770,Morriott International,Morriot,copenhagen,1,1,0,210,73,30,french,0
4,562,562,Chillton Worldwide,Quadrupletree,copenhagen,0,0,0,215,74,30,french,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,332,332,Chillton Worldwide,Quadrupletree,copenhagen,1,1,0,208,86,30,french,0
140,978,978,Chillton Worldwide,Chill Garden Inn,copenhagen,1,0,0,299,120,30,french,0
141,335,335,Independant,Independant,copenhagen,0,0,0,91,46,30,french,0
142,385,385,Accar Hotels,Safitel,copenhagen,0,0,0,283,101,30,french,0


## Automatisation du code pour création des datasets

On va chercher à créer une liste contenant le nom de tous les avatars.

In [8]:
r = requests.get(path(f"avatars/{user_id}"))
list_avatars = []
for avatar in r.json():
    list_avatars.append(avatar['name'])

Ayant tous les avatars, on peut créer les datasets pour chacun des avatars, s'il n'existe pas déjà, et les compléter avec les données récupérées.

In [9]:
hotels_features = pd.read_csv(os.path.join('features', 'features_hotels.csv'))

Le code ci-dessous permet de créer le dataset associé à un avatar précis. 

In [16]:
list_requests = glob.glob(os.path.join('requests', '*.csv'))
list_requests = [request.split('\\')[-1] for request in list_requests]
df = pd.DataFrame([])
print(list_requests)

if not os.path.exists('datasets'):
    os.mkdir('datasets')

for avatar in ['laulau']:
    for request in list_requests:
        name_avatar_request = request.split('_')[0] 
        if not os.path.exists(os.path.join('datasets', avatar + '_data.csv')):
            if (avatar==name_avatar_request):
                request_avatar = pd.read_csv(os.path.join('requests', request))
                #request_avatar = request_avatar.drop(columns = 'Unnamed: 0')
                features_avatar = hotels_features.loc[hotels_features['hotel_id'].loc[request_avatar['hotel_id']]]
                features_avatar = features_avatar.reset_index()
                df = pd.concat([features_avatar, request_avatar[['price', 'stock', 'date', 'language', 'mobile']]], axis = 1)
                df['avatar_name'] = [avatar for i in range(len(df.index))]
                df.to_csv(os.path.join('datasets', avatar + '_data.csv'),index_label = 'hotel_id')
        else:
            if (avatar==name_avatar_request):
                df_old = pd.read_csv(os.path.join('datasets', avatar + '_data.csv'))
                df_old = df_old.drop(columns = 'Unnamed: 0')
                request_avatar = pd.read_csv(os.path.join('requests', request))
                request_avatar = request_avatar.drop(columns = 'Unnamed: 0')
                features_avatar = hotels_features.loc[hotels_features['hotel_id'].loc[request_avatar['hotel_id']]]
                features_avatar = features_avatar.reset_index()
                df_aux = pd.concat([features_avatar, request_avatar[['price', 'stock', 'date', 'language', 'mobile']]], axis = 1)
                df = pd.concat([df,df_aux], axis = 0)
                df['avatar_name'] = [avatar for i in range(len(df.index))]
                df.to_csv(os.path.join('datasets', avatar + '_data.csv'))


['laulau_amsterdam_10_austrian_0.csv', 'laulau_amsterdam_10_austrian_1.csv']


In [31]:
#Concaténatation des datasets de chacun des avatars pour créer un dataset global
datasets = glob.glob(os.path.join('datasets', 'laulau_data.csv'))

for dataset_path in datasets:
    dataset = pd.read_csv(dataset_path)
    if not os.path.exists('data.csv'):
        df = pd.DataFrame([])
        df = dataset
        df.to_csv('data.csv', index_label = 'hotel_id')
    else:
        df = pd.read_csv('data.csv')
        print(df.head())
        df = pd.concat([df, dataset], axis = 0)
        df.to_csv('data.csv')

In [33]:
pd.read_csv(os.path.join('datasets', 'laulau_data.csv'))

Unnamed: 0.1,Unnamed: 0,index,hotel_id,group,brand,city,parking,pool,children_policy,price,stock,date,language,mobile,avatar_name
0,0,917,917,Morriott International,Corlton,amsterdam,1,1,0,366,14,10,austrian,0,laulau
1,1,101,101,Accar Hotels,Safitel,amsterdam,1,0,0,359,10,10,austrian,0,laulau
2,2,762,762,Yin Yang,Ardisson,amsterdam,1,1,0,193,10,10,austrian,0,laulau
3,3,140,140,Boss Western,Boss Western,amsterdam,0,0,0,205,4,10,austrian,0,laulau
4,4,256,256,Accar Hotels,Ibas,amsterdam,1,0,0,86,0,10,austrian,0,laulau
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98711,88,498,498,Boss Western,Boss Western,vilnius,0,0,0,94,40,30,czech,1,laulau
98712,89,516,516,Morriott International,Corlton,vilnius,1,0,2,195,117,30,czech,1,laulau
98713,90,187,187,Accar Hotels,Marcure,vilnius,1,1,0,137,86,30,czech,1,laulau
98714,91,687,687,Chillton Worldwide,Tripletree,vilnius,0,0,0,110,52,30,czech,1,laulau
