# Defi IA 2023 - Prédiction du prix d'une chambre d'hôtels

In [42]:
import pandas as pd
import numpy as np
import os
import glob

## Mise en lien des requêtes et des features des differents hotels

In [10]:
hotels_features = pd.read_csv(os.path.join('features', 'features_hotels.csv'))
laulau_requests_copenhagen = pd.read_csv(os.path.join('requests', 'laulau_copenhagen_30_0.csv'))
laulau_requests_copenhagen = laulau_requests_copenhagen.drop(columns = 'Unnamed: 0')

In [11]:
laulau_requests_copenhagen

Unnamed: 0,hotel_id,price,stock,city,date,language,mobile,avatar_id
0,519,208,68,copenhagen,30,french,0,3533
1,523,85,41,copenhagen,30,french,0,3533
2,898,86,24,copenhagen,30,french,0,3533
3,770,210,73,copenhagen,30,french,0,3533
4,562,215,74,copenhagen,30,french,0,3533
...,...,...,...,...,...,...,...,...
139,332,208,86,copenhagen,30,french,0,3533
140,978,299,120,copenhagen,30,french,0,3533
141,335,91,46,copenhagen,30,french,0,3533
142,385,283,101,copenhagen,30,french,0,3533


La commande ci-dessous permet de charger les informations en lien avec la requête lancée. Ainsi, on peut lier les caractéristiques de l'hôtel avec la recherche effectuée par un avatar, dans une ville et une langue particulière.  

In [19]:
features_laulau_copenhagen = hotels_features.loc[hotels_features['hotel_id'].loc[laulau_requests_copenhagen['hotel_id']]]
features_laulau_copenhagen = features_laulau_copenhagen.reset_index()

In [35]:
df = pd.concat([features_laulau_copenhagen, laulau_requests_copenhagen[['price', 'stock', 'date', 'language', 'mobile']]], axis = 1)
df

Unnamed: 0,index,hotel_id,group,brand,city,parking,pool,children_policy,price,stock,date,language,mobile
0,519,519,Yin Yang,Royal Lotus,copenhagen,0,0,0,208,68,30,french,0
1,523,523,Independant,Independant,copenhagen,0,0,0,85,41,30,french,0
2,898,898,Yin Yang,8 Premium,copenhagen,1,0,0,86,24,30,french,0
3,770,770,Morriott International,Morriot,copenhagen,1,1,0,210,73,30,french,0
4,562,562,Chillton Worldwide,Quadrupletree,copenhagen,0,0,0,215,74,30,french,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,332,332,Chillton Worldwide,Quadrupletree,copenhagen,1,1,0,208,86,30,french,0
140,978,978,Chillton Worldwide,Chill Garden Inn,copenhagen,1,0,0,299,120,30,french,0
141,335,335,Independant,Independant,copenhagen,0,0,0,91,46,30,french,0
142,385,385,Accar Hotels,Safitel,copenhagen,0,0,0,283,101,30,french,0


## Automatisation du code pour création des datasets

In [30]:
import urllib.parse
import requests

domain = "51.91.251.0"
port = 3000
host = f"http://{domain}:{port}"
path = lambda x: urllib.parse.urljoin(host, x)
user_id = '88be3640-82ae-46c6-ac59-92a9679d453d'

On va chercher à créer une liste contenant le nom de tous les avatars.

In [39]:
r = requests.get(path(f"avatars/{user_id}"))
list_avatars = []
for avatar in r.json():
    list_avatars.append(avatar['name'])

Ayant tous les avatars, on peut créer les datasets pour chacun des avatars, s'il n'existe pas déjà, et les compléter avec les données récupérées.

In [43]:
hotels_features = pd.read_csv(os.path.join('features', 'features_hotels.csv'))

Le code ci-dessous permet de créer le dataset associé à un avatar précis. 

In [67]:
list_requests = glob.glob(os.path.join('requests', '*'))
list_requests = [request.split('\\')[-1] for request in list_requests]
for avatar in list_avatars:
    for request in list_requests:
        name_avatar_request = request.split('_')[0] 
        if not os.path.exists(os.path.join('datasets', avatar + '_data.csv')):
            if (avatar==name_avatar_request):
                request_avatar = pd.read_csv(os.path.join('requests', request))
                request_avatar = request_avatar.drop(columns = 'Unnamed: 0')
                features_avatar = hotels_features.loc[hotels_features['hotel_id'].loc[request_avatar['hotel_id']]]
                features_avatar = features_avatar.reset_index()
                df = pd.concat([features_avatar, request_avatar[['price', 'stock', 'date', 'language', 'mobile']]], axis = 1)
                df.to_csv(os.path.join('datasets', avatar + '_data.csv'))
        else:
            df_old = pd.read_csv(os.path.join('datasets', avatar + '_data.csv'))
            df_old = df_old.drop(columns = 'Unnamed: 0')
            request_avatar = pd.read_csv(os.path.join('requests', request))
            request_avatar = request_avatar.drop(columns = 'Unnamed: 0')
            features_avatar = hotels_features.loc[hotels_features['hotel_id'].loc[request_avatar['hotel_id']]]
            features_avatar = features_avatar.reset_index()
            df_aux = pd.concat([features_avatar, request_avatar[['price', 'stock', 'date', 'language', 'mobile']]], axis = 1)
            df = pd.concat([df,df_aux], axis = 0)
            df.to_csv(os.path.join('datasets', avatar + '_data.csv'))

df_old=    index  hotel_id  price  stock        city  date language  mobile  \
0      0       519    208     68  copenhagen    30   french       0   
1      1       523     85     41  copenhagen    30   french       0   
2      2       898     86     24  copenhagen    30   french       0   
3      3       770    210     73  copenhagen    30   french       0   
4      4       562    215     74  copenhagen    30   french       0   

   avatar_id  price.1  stock.1  date.1 language.1  mobile.1  
0       3533      208       68      30     french         0  
1       3533       85       41      30     french         0  
2       3533       86       24      30     french         0  
3       3533      210       73      30     french         0  
4       3533      215       74      30     french         0  
df_aux=    index  hotel_id                   group          brand        city  \
0    519       519                Yin Yang    Royal Lotus  copenhagen   
1    523       523             Independ