In [3]:
#pip install gradio

In [4]:
import gradio as gr

import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR

In [6]:
# Pour utiliser un GPU
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
MODE = "GPU" if "GPU" in [k.device_type for k in device_lib.list_local_devices()] else "CPU"
print(MODE)

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 14996402906895943551
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 14415560704
locality {
  bus_id: 1
  links {
  }
}
incarnation: 4730242169389344748
physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
xla_global_id: 416903419
]
GPU


## Chargement des données

In [None]:
requests=pd.read_csv("data.csv",sep=",",header=0)
requests.index=requests['hotel_id']
requests=requests.drop(['Unnamed: 0'], axis=1)
requests=requests.drop(['Unnamed: 0.1'], axis=1)
#requests=requests.drop(['Unnamed: 0.1.1'], axis=1)
#requests=requests.drop(['Unnamed: 0.1.1.1'], axis=1)
requests=requests.drop(['index'], axis=1)
requests=requests.drop(['hotel_id'],axis=1)
requests=requests.drop(['hotel_id.1'],axis=1)

In [None]:
# on convertit les variables en catégories
requests["pool"]=pd.Categorical(requests["pool"],ordered=False)
requests["parking"]=pd.Categorical(requests["parking"],ordered=False)
requests["children_policy"]=pd.Categorical(requests["children_policy"],ordered=False)
requests["language"]=pd.Categorical(requests["language"],ordered=False)
requests["mobile"]=pd.Categorical(requests["mobile"],ordered=False)
requests["city"]=pd.Categorical(requests["city"],ordered=False)
requests["group"]=pd.Categorical(requests["group"],ordered=False)
requests["brand"]=pd.Categorical(requests["brand"],ordered=False)

In [None]:
requests.head()

In [None]:
#on transforme les catégories de string en catégories de nombres (on pourra faire les correspondances plus tard)
_, opt = np.unique(np.array(requests['group']), return_inverse=True)
requests['group']=opt
_, opt = np.unique(np.array(requests['brand']), return_inverse=True)
requests['brand']=opt
_, opt = np.unique(np.array(requests['city']), return_inverse=True)
requests['city']=opt
_, opt = np.unique(np.array(requests['language']), return_inverse=True)
requests['language']=opt
requests

## Création de l'échantillon d'apprentissage et de l'échantillon test

In [7]:
#j'ai utilisé un jeu de données plus petit pour pouvoir entrainer mon modèle plus vite
requests=pd.read_csv("/content/data/laulau_data.csv",sep=",",header=0)
requests.index=requests['hotel_id']
requests=requests.drop(['Unnamed: 0'], axis=1)
requests=requests.drop(['index'], axis=1)
requests=requests.drop(['hotel_id'],axis=1)
# je drop l'avatar pour le test car ici on en a qu'un 
requests=requests.drop(['avatar_name'], axis=1)
requests.head()

Unnamed: 0_level_0,group,brand,city,parking,pool,children_policy,price,stock,date,language,mobile
hotel_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
519,Yin Yang,Royal Lotus,copenhagen,0,0,0,208,68,30,french,0
523,Independant,Independant,copenhagen,0,0,0,85,41,30,french,0
898,Yin Yang,8 Premium,copenhagen,1,0,0,86,24,30,french,0
770,Morriott International,Morriot,copenhagen,1,1,0,210,73,30,french,0
562,Chillton Worldwide,Quadrupletree,copenhagen,0,0,0,215,74,30,french,0


In [8]:
#on transforme les catégories de string en catégories de nombres (on pourra faire les correspondances plus tard)
_, opt = np.unique(np.array(requests['group']), return_inverse=True)
requests['group']=opt
_, opt = np.unique(np.array(requests['brand']), return_inverse=True)
requests['brand']=opt
_, opt = np.unique(np.array(requests['city']), return_inverse=True)
requests['city']=opt
_, opt = np.unique(np.array(requests['language']), return_inverse=True)
requests['language']=opt
requests

Unnamed: 0_level_0,group,brand,city,parking,pool,children_policy,price,stock,date,language,mobile
hotel_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
519,5,13,0,0,0,0,208,68,30,4,0
523,3,7,0,0,0,0,85,41,30,4,0
898,5,0,0,1,0,0,86,24,30,4,0
770,4,10,0,1,1,0,210,73,30,4,0
562,2,12,0,0,0,0,215,74,30,4,0
...,...,...,...,...,...,...,...,...,...,...,...
498,1,2,3,0,0,0,94,40,30,2,1
516,4,4,3,1,0,2,195,117,30,2,1
187,0,9,3,1,1,0,137,86,30,2,1
687,2,15,3,0,0,0,110,52,30,2,1


In [9]:
price=requests["price"]
X_train,X_test,price_train,price_test=train_test_split(requests,price,test_size=0.25,random_state=11)
X_train=X_train.drop(['price'], axis=1)
X_test=X_test.drop(['price'], axis=1)

In [10]:
X_train.head()

Unnamed: 0_level_0,group,brand,city,parking,pool,children_policy,stock,date,language,mobile
hotel_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
237,4,5,2,1,0,0,56,30,2,0
476,3,7,2,1,0,0,33,30,2,0
221,3,7,3,1,0,0,28,30,1,0
618,0,11,1,1,0,0,39,30,0,0
600,3,7,1,1,0,0,27,30,3,0


# Prédiction du prix des hôtels avec plusieurs méthodes

In [11]:
# On définit la fonction MAPE : Mean Absolute Percentage Error
def MAPE(Y_actual,Y_Predicted):
    mape = np.mean(np.abs((Y_actual - Y_Predicted)/Y_actual))*100
    return mape

In [12]:
#on cherche les meilleurs paramètres puis on entraine le modèle avec ces derniers
rate = list(1e-1*np.array(list(range(1,30,3))))
acti = ['relu','tanh','logistic','identity']
hidden = list(range(10,150,10))
param_grid=[{"hidden_layer_sizes":hidden,"activation":acti,"learning_rate_init":rate}]
nnet= GridSearchCV(MLPRegressor(max_iter=500),param_grid,cv=10,n_jobs=-1)
nnetOpt=nnet.fit(X_train, price_train)
# paramètre optimal
print("Meilleur score = %f, Meilleur paramètre = %s" % (1. - nnetOpt.best_score_,nnetOpt.best_params_))

Meilleur score = 0.251860, Meilleur paramètre = {'activation': 'relu', 'hidden_layer_sizes': 140, 'learning_rate_init': 0.1}


## Calcul de l'erreur de prédiction

In [39]:
estimated_price_nnet=nnetOpt.predict(X_test)

### MSE

In [40]:
mean_squared_error(price_test, estimated_price_nnet)

554.5734052625056

### MAPE

In [41]:
MAPE(price_test,estimated_price_nnet)

15.939833657023062

### Utilisation de Gradio

In [42]:
def predict_price(group,brand,city,language,date,parking,pool,mobile, children_policy,stock):
  hotel_features = pd.DataFrame(columns=X_test.columns)

  if parking: 
    park=1
  else:
    park=0

  if pool:
    pool=1
  else:
    pool=0

  if children_policy=="no_policy":
    children_policy=0
  elif children_policy=="no_less_12":
    children_policy=1
  else:
    children_policy=2

  print(children_policy)
  
  if mobile:
    mobile=1
  else:
    mobile=0
  
  hotel_features=hotel_features.append({'group' : group , 
                'brand' : brand, 
                'city' : city, 
                "parking" : park,
               "pool" : pool,
                "children_policy" : children_policy,
                "stock" : stock,
                "date" : date,
                "language" : language,
                "mobile" : mobile
                }
                , ignore_index=True)
  
  hotel_features["pool"]=pd.Categorical(hotel_features["pool"],ordered=False)
  hotel_features["parking"]=pd.Categorical(hotel_features["parking"],ordered=False)
  hotel_features["children_policy"]=pd.Categorical(hotel_features["children_policy"],ordered=False)
  hotel_features["language"]=pd.Categorical(hotel_features["language"],ordered=False)
  hotel_features["mobile"]=pd.Categorical(hotel_features["mobile"],ordered=False)
  hotel_features["city"]=pd.Categorical(hotel_features["city"],ordered=False)
  hotel_features["group"]=pd.Categorical(hotel_features["group"],ordered=False)
  hotel_features["brand"]=pd.Categorical(hotel_features["brand"],ordered=False)

  _, opt = np.unique(np.array(hotel_features['group']), return_inverse=True)
  hotel_features['group']=opt
  _, opt = np.unique(np.array(hotel_features['brand']), return_inverse=True)
  hotel_features['brand']=opt
  _, opt = np.unique(np.array(hotel_features['city']), return_inverse=True)
  hotel_features['city']=opt
  _, opt = np.unique(np.array(hotel_features['language']), return_inverse=True)
  hotel_features['language']=opt
  
  estimated_price_nnet=nnetOpt.predict(hotel_features)
  #return hotel_features
  return int(estimated_price_nnet)

In [34]:
date = gr.inputs.Slider(minimum=1, maximum=44, step=1)
children_policy = gr.inputs.Radio(choices=["no_policy", "no_less_12", "no_children"])

demo = gr.Interface(
    fn=predict_price,
    inputs=["text", "text","text","text",date,"checkbox","checkbox","checkbox",children_policy,"number"],
    outputs=["number"],
)
demo.launch()

  "Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components",
  "Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components",


Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
Running on public URL: https://28d4fbc5fc4d7e0b.gradio.app

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


(<gradio.routes.App at 0x7f04f7369f10>,
 'http://127.0.0.1:7868/',
 'https://28d4fbc5fc4d7e0b.gradio.app')