## Testing the model on test data

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split as tts
from sklearn.model_selection import StratifiedShuffleSplit

In [2]:
housing = pd.read_csv('data.csv')

In [3]:
train_set, test_set = tts(housing, test_size=0.2, random_state=42)
print(f"Rows in train set : {len(train_set)}\nRows in test set : {len(test_set)}")

Rows in train set : 404
Rows in test set : 102


In [4]:
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, test_index in split.split(housing, housing['CHAS']):
    start_train_set = housing.loc[train_index]
    start_test_set = housing.loc[test_index]

In [5]:
start_test_set["CHAS"].value_counts()

0    95
1     7
Name: CHAS, dtype: int64

In [6]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy= 'median') 
imputer.fit(housing)
my_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="median")),
    ('std_scaler', StandardScaler())
])

In [7]:
test_data = start_test_set.drop("MEDV", axis=1)
test_valu = start_test_set["MEDV"].copy()
data_prediction = my_pipeline.fit_transform(test_data)

In [8]:
from joblib import dump, load
model = load('Housing_valu.joblib')

In [9]:
data_prediction = my_pipeline.transform(test_data)

In [12]:
finel_prediction = model.predict(data_prediction)

In [15]:
from sklearn.metrics import mean_squared_error
import numpy as np
finel_mse = mean_squared_error(test_valu, finel_prediction)
finel_rmse = np.sqrt(finel_mse)
print(finel_prediction, list(test_valu))

[24.601  9.858 25.586 21.244 17.234 14.507 19.499 14.433 32.271 42.567
 19.08   8.89  23.806 32.298 20.375 12.116 32.287 14.52  23.26  17.11
 19.52  15.129 16.791 21.571 18.426 31.264 16.914 31.648  9.471 33.681
 24.093 21.177 22.776  8.908 20.27  12.805 42.075 23.992 24.076 43.954
 23.302 26.602 20.07  21.876 15.512 32.499 45.306 20.998 18.993 22.34
 21.511 14.748 19.079 14.134 25.107 34.007 43.323 28.741 18.063 21.229
 47.741 10.753 19.051 23.224 13.635 38.958 19.4   16.923 17.993 33.396
 25.675 22.635 20.076 23.08  35.628 13.418 15.689 20.121 21.348 21.857
 22.498 21.798 13.551 23.146 21.242 21.99  14.251 21.225 22.349 23.547
 17.742 27.225  8.11  26.298 15.366 30.096 20.036 31.47  13.801 26.674
 18.141 17.533] [16.5, 10.2, 30.1, 23.0, 14.4, 15.6, 19.4, 14.1, 30.3, 35.2, 23.1, 13.8, 25.0, 27.9, 19.5, 12.3, 32.2, 13.5, 23.8, 21.7, 19.2, 19.5, 10.4, 23.2, 18.6, 28.5, 15.2, 32.0, 7.2, 34.6, 20.1, 20.6, 23.6, 13.1, 23.8, 12.7, 43.1, 24.7, 22.2, 44.0, 28.1, 31.0, 21.7, 23.4, 19.5, 33.1, 

In [14]:
print(finel_mse)

11.872728068627453
