In [30]:
import pandas as pd
import pickle
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split

df = pd.read_csv('house_cleaned_datas.csv')



# Features
numeric_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'grade', 'waterfront', 'condition']
categorical_features = ['zipcode']

# Transformers
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# Pre-processing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features),
    ])

# Définition du Pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('regressor', KNeighborsRegressor(n_neighbors=5))])

X = df.drop(['price', 'lat', 'long', 'date', 'sqft_living15', 'sqft_lot15', 'view'], axis=1)
y = df['price'].values  # transformation de y en array

# Entraînement du modèle
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# On laisse une parti du pipeline en entraînement
pipeline.fit(X_train, y_train)

# On passe l'étape de la prédiction aux tests
y_pred = pipeline.predict(X_test)

In [31]:
observation = X_test[:1]
prix_pred = pipeline.predict(observation)
print("Prix prédit :", prix_pred[0])
print("Prix réel :", y_test[0])

Prix prédit : 404700.0
Prix réel : 365000.0


In [25]:
with open('knn_model.pkl', 'wb') as f:
    pickle.dump(pipeline, f)