# Scikit-learn

- Instancer le modele de ML que l'on souhaite utiliser
- Adapter le modèle au donnée d'entrainement
- Utiliser le modèle pour faire des prédictions
- Evaluation de la précision des prédictions

### classe KNeighborsRegressor

In [30]:
from sklearn.neighbors import KNeighborsRegressor

In [31]:
import pandas as pd
import numpy as np

np.random.seed(1)

#lecture et shuffling du listing
listing = pd.read_csv('paris_airbnb.csv')
listing['price'] = listing['price'].apply(lambda x: x.replace('$', '').replace(',', '')).astype(float)
listing = listing.loc[np.random.permutation(len(listing))]

In [32]:
listing.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8000 entries, 4740 to 5157
Data columns (total 19 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   host_response_rate    5000 non-null   object 
 1   host_acceptance_rate  0 non-null      float64
 2   host_listings_count   7999 non-null   float64
 3   latitude              8000 non-null   float64
 4   longitude             8000 non-null   float64
 5   city                  7997 non-null   object 
 6   zipcode               7930 non-null   object 
 7   state                 7977 non-null   object 
 8   accommodates          8000 non-null   int64  
 9   room_type             8000 non-null   object 
 10  bedrooms              7976 non-null   float64
 11  bathrooms             7942 non-null   float64
 12  beds                  7986 non-null   float64
 13  price                 8000 non-null   float64
 14  cleaning_fee          6250 non-null   object 
 15  security_deposit      6

In [33]:
#nettoyage des données
listing = listing.drop(['host_response_rate', 'host_acceptance_rate', 'host_listings_count', 'latitude', 'longitude', 'city', 'zipcode', 'state', 'room_type'], axis=1)

In [34]:

listing.isnull().sum()

accommodates            0
bedrooms               24
bathrooms              58
beds                   14
price                   0
cleaning_fee         1750
security_deposit     1680
minimum_nights          0
maximum_nights          0
number_of_reviews       0
dtype: int64

In [35]:
listing = listing.drop(['security_deposit', 'cleaning_fee'], axis=1)
listing = listing.dropna(axis=0)
listing.isnull().sum()

accommodates         0
bedrooms             0
bathrooms            0
beds                 0
price                0
minimum_nights       0
maximum_nights       0
number_of_reviews    0
dtype: int64

In [44]:
# normalization
normalized_listing = (listing - listing.mean()) / (listing.std())
normalized_listing['price'] = listing['price']
normalized_listing.head()

Unnamed: 0,accommodates,bedrooms,bathrooms,beds,price,minimum_nights,maximum_nights,number_of_reviews
4740,0.503868,-0.296884,-0.293537,0.20531,65.0,-0.184601,1.062858,-0.564545
5606,-0.131849,0.892605,0.843973,0.20531,98.0,-0.101183,1.061018,-0.636924
4824,-0.767566,-1.486372,-0.293537,-0.64526,65.0,0.037847,1.062858,-0.6514
4205,-0.767566,-0.296884,-0.293537,-0.64526,45.0,-0.101183,1.062858,0.550105
3228,-0.131849,-0.296884,-0.293537,0.20531,65.0,-0.101183,-0.335685,-0.579021


In [45]:
train_set = normalized_listing.iloc[0:6000]
test_set = normalized_listing.iloc[6000:]

In [None]:


knn = KNeighborsRegressor(algorithm='brute', n_neighbors=5)

In [46]:
knn.fit(train_set[['accommodates', 'bedrooms']], train_set['price'])
predict = knn.predict(test_set[['accommodates', 'bedrooms']])
predict

array([ 60. ,  81.8,  81.8, ...,  60. , 121.2,  81.8])

In [50]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [51]:
mse = mean_squared_error(test_set['price'], predict)
rmse = np.sqrt(mse)

mae = mean_absolute_error(test_set['price'], predict)
print(mae)
print(mse)
print(rmse)

43.18205529473135
6236.238664580073
78.96985921590638


In [53]:
knn.fit(train_set.drop(['price'], axis=1), train_set['price'])
prediction = knn.predict(test_set.drop(['price'], axis=1))

mae = mean_absolute_error(test_set['price'], prediction)
mse = mean_squared_error(test_set['price'], prediction)
rmse = np.sqrt(mse)

print(mae)
print(mse)
print(rmse)

43.533124673969745
6323.108335941575
79.51797492354528


In [56]:
feature = ['accommodates', 'bedrooms', 'bathrooms']

knn = KNeighborsRegressor(algorithm='brute', n_neighbors=5)
knn.fit(train_set[feature], train_set['price'])
prediction = knn.predict(test_set[feature])

mae = mean_absolute_error(test_set['price'], prediction)
mse = mean_squared_error(test_set['price'], prediction)
rmse = np.sqrt(mse)

print(mae)
print(mse)
print(rmse)

40.49880020865937
5385.400667709963
73.38528917780431


In [57]:
feature = ['accommodates', 'bedrooms', 'bathrooms']

knn = KNeighborsRegressor(algorithm='brute', n_neighbors=50)
knn.fit(train_set[feature], train_set['price'])
prediction = knn.predict(test_set[feature])

mae = mean_absolute_error(test_set['price'], prediction)
mse = mean_squared_error(test_set['price'], prediction)
rmse = np.sqrt(mse)

print(mae)
print(mse)
print(rmse)

38.92004173187272
4857.563805112155
69.69622518553035


In [60]:
feature = ['accommodates', 'bedrooms', 'bathrooms']

knn = KNeighborsRegressor(algorithm='brute', n_neighbors=50)
knn.fit(train_set[feature], train_set['price'])
prediction = knn.predict(test_set[feature])

mae = mean_absolute_error(test_set['price'], prediction)
mse = mean_squared_error(test_set['price'], prediction)
rmse = np.sqrt(mse)

print(mae)
print(mse)
print(rmse)

38.92004173187272
4857.563805112155
69.69622518553035


39.383526343244654
4972.517035159103
70.51607643054953
