In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [2]:
automobile_data = pd.read_csv('datasets/CarPrice_Assignment.csv')
automobile_data.head()

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


In [4]:
automobile_data.shape

(205, 26)

In [6]:
automobile_data.columns

Index(['car_ID', 'symboling', 'CarName', 'fueltype', 'aspiration',
       'doornumber', 'carbody', 'drivewheel', 'enginelocation', 'wheelbase',
       'carlength', 'carwidth', 'carheight', 'curbweight', 'enginetype',
       'cylindernumber', 'enginesize', 'fuelsystem', 'boreratio', 'stroke',
       'compressionratio', 'horsepower', 'peakrpm', 'citympg', 'highwaympg',
       'price'],
      dtype='object')

In [7]:
automobile_data.drop(['CarName','car_ID','symboling'],axis=1, inplace=True)

In [8]:
automobile_data = pd.get_dummies(automobile_data)

In [9]:
automobile_data.head()

Unnamed: 0,wheelbase,carlength,carwidth,carheight,curbweight,enginesize,boreratio,stroke,compressionratio,horsepower,...,cylindernumber_twelve,cylindernumber_two,fuelsystem_1bbl,fuelsystem_2bbl,fuelsystem_4bbl,fuelsystem_idi,fuelsystem_mfi,fuelsystem_mpfi,fuelsystem_spdi,fuelsystem_spfi
0,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,9.0,111,...,0,0,0,0,0,0,0,1,0,0
1,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,9.0,111,...,0,0,0,0,0,0,0,1,0,0
2,94.5,171.2,65.5,52.4,2823,152,2.68,3.47,9.0,154,...,0,0,0,0,0,0,0,1,0,0
3,99.8,176.6,66.2,54.3,2337,109,3.19,3.4,10.0,102,...,0,0,0,0,0,0,0,1,0,0
4,99.4,176.6,66.4,54.3,2824,136,3.19,3.4,8.0,115,...,0,0,0,0,0,0,0,1,0,0


In [10]:
X = automobile_data.drop('price', axis=1)
Y = automobile_data['price']

In [11]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [12]:
model = LinearRegression().fit(x_train, y_train)

In [13]:
Training_score = model.score(x_train, y_train)

In [14]:
Training_score

0.9413953065021247

In [15]:
y_pred = model.predict(x_test)

In [30]:
r2_score(y_test, y_pred)

0.9121746899721064

# To serialize ----

In [17]:
import json

In [18]:
model.coef_

array([-1.30978911e+01, -2.13152995e+01,  6.01607199e+02,  9.93734886e+01,
        3.16609392e+00,  1.10839624e+02, -1.97872749e+03, -3.83709063e+03,
       -3.21679818e+02,  2.88314443e+01,  1.87575693e+00, -1.08484351e+02,
        1.36481993e+02,  2.23315712e+03, -2.23315712e+03, -7.54266773e+02,
        7.54266773e+02, -8.86113866e+01,  8.86113866e+01,  2.49106892e+03,
       -6.23837285e+02, -8.26978432e+02, -6.23406445e+01, -9.77912561e+02,
       -3.33360116e+02, -1.85628205e+02,  5.18988322e+02, -2.73386180e+03,
        2.73386180e+03,  9.69414072e+02, -8.97010058e+03,  1.86839491e+03,
        4.48985155e+03,  3.08215569e+03, -4.57603696e+03,  3.13632131e+03,
        7.07396062e+03, -3.57902343e+03, -4.14679802e+03, -5.55563181e+02,
        3.78753142e+03, -5.71642874e+03,  3.13632131e+03,  6.98975452e+02,
        8.28824314e+02, -2.45435304e+02,  2.23315712e+03, -2.59067754e+03,
        6.82760554e+02, -2.02694216e+03,  4.19337561e+02])

In [19]:
model.intercept_

-36203.58673846251

To serialize, we only need to save the coef and intercept

In [20]:
model_param = {}

model_param['coef'] = list(model.coef_)
model_param['intercept'] = model.intercept_.tolist()

In [21]:
# to serialize it, we need to use json dump --
json_txt = json.dumps(model_param, indent=4)
json_txt

'{\n    "coef": [\n        -13.097891080639066,\n        -21.315299534095892,\n        601.6071987994939,\n        99.37348863572318,\n        3.166093918110647,\n        110.83962384205158,\n        -1978.7274913805159,\n        -3837.090634634735,\n        -321.67981784595406,\n        28.83144428576827,\n        1.875756929562158,\n        -108.48435099132485,\n        136.48199265367202,\n        2233.1571225208418,\n        -2233.1571225208477,\n        -754.2667731973884,\n        754.2667731973904,\n        -88.6113866394758,\n        88.61138663943602,\n        2491.068921841371,\n        -623.8372846336036,\n        -826.9784321914995,\n        -62.34064449361978,\n        -977.9125605226125,\n        -333.3601163004094,\n        -185.62820520725523,\n        518.9883215076721,\n        -2733.8618049281304,\n        2733.8618049281295,\n        969.4140723273855,\n        -8970.10058016752,\n        1868.3949140533182,\n        4489.851548858039,\n        3082.155690015715,\n 

In [23]:
# write it to a file

with open('models/regressor_param.txt', 'w') as file:
    file.write(json_txt)

In [26]:
# to check contents

with open('models/regressor_param.txt', 'r') as file:
    json_loaded = json.load(file)

In [27]:
json_loaded

{'coef': [-13.097891080639066,
  -21.315299534095892,
  601.6071987994939,
  99.37348863572318,
  3.166093918110647,
  110.83962384205158,
  -1978.7274913805159,
  -3837.090634634735,
  -321.67981784595406,
  28.83144428576827,
  1.875756929562158,
  -108.48435099132485,
  136.48199265367202,
  2233.1571225208418,
  -2233.1571225208477,
  -754.2667731973884,
  754.2667731973904,
  -88.6113866394758,
  88.61138663943602,
  2491.068921841371,
  -623.8372846336036,
  -826.9784321914995,
  -62.34064449361978,
  -977.9125605226125,
  -333.3601163004094,
  -185.62820520725523,
  518.9883215076721,
  -2733.8618049281304,
  2733.8618049281295,
  969.4140723273855,
  -8970.10058016752,
  1868.3949140533182,
  4489.851548858039,
  3082.155690015715,
  -4576.036959533307,
  3136.321314446348,
  7073.960623602811,
  -3579.023425562633,
  -4146.798017781329,
  -555.5631808382183,
  3787.5314219804627,
  -5716.428735847417,
  3136.3213144463466,
  698.9754523572421,
  828.8243139407925,
  -245.43530

In [28]:
new_model = LinearRegression()

In [29]:
new_model.coef_ = np.array(json_loaded['coef'])
new_model.intercept_ = np.array(json_loaded['intercept'])

In [31]:
y_new_pred = new_model.predict(x_test)
r2_score(y_test, y_new_pred)

0.9121746899721064