In [1]:
# loading necessary modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import ensemble
from sklearn import datasets
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# loading the data
boston = datasets.load_boston()

print("Data Description: \n", boston.DESCR)

Data Description: 
 .. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
 

In [2]:
# Getting the data
X_data = pd.DataFrame(boston.data)

# Getting Feature names
X_data.columns = boston.feature_names

# Getting the target values
y_target = boston.target

In [3]:
# Splitting data in 7:3 ration of train and test
X_train, X_test, y_train, y_test = train_test_split(X_data, y_target, test_size=0.3)

In [4]:
# GradientBoostingRefressor model
model = ensemble.GradientBoostingRegressor()

# Fit the mode
model.fit(X_train, y_train)

# Predict Values
y_pred = model.predict(X_test)

# Mean Squared Error
print(mean_squared_error(y_test, y_pred))

8.214333228277033


In [5]:
# saving the model
import pickle
with open('model/model.pkl','wb') as file:
    pickle.dump(model, file)
    
# saving the columns
model_columns = list(X_data.columns)
with open('model/model_columns.pkl','wb') as file:
    pickle.dump(model, file)