# Model Notebook for Coding in SK-Learn

In [1]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression

In [2]:
# import the data
df = sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [18]:
# # preprocess the data
scalar = StandardScaler()
df[['total_bill', 'tip']] = scalar.fit_transform(df[['total_bill', 'tip']])

In [19]:
# separate the features X and the target/labels y
X = df[['total_bill']]
y = df['tip']

In [20]:
# train test split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

In [23]:
# Preprcess the data. in this case we need to scale the data and not done before
# scalar = StandardScaler()
# X_train = scalar.fit_transform(X_train)
# y_test = scalar.fit_transform(y_test)

In [24]:
# call the model
model = LinearRegression()

In [25]:
# train the model
model.fit(X_train, y_train)

In [28]:
# prediction
y_pred = model.predict(X_test)
y_pred



array([ 1.19748752, -0.73185445, -0.19377553, -0.65365881,  0.36598139,
        0.33191597, -0.44307252,  1.14871203, -0.88592309,  0.32649647,
        0.06945733, -0.95095709,  0.06636047,  1.15026045, -0.34165045,
        0.45811289,  0.57966453, -0.67920788, -0.7891463 , -0.49726752,
       -0.49339645,  0.09423218,  0.88857603, -0.40823288,  0.33036754,
       -0.1705491 ,  0.2088159 ,  0.13216868, -0.51584866, -0.49339645,
        0.80108982, -0.1163541 ,  0.41940218,  0.1058454 , -0.38810331,
        2.2047403 , -0.26964853, -0.29055231, -0.37029638,  0.36830404,
       -0.17983967,  1.65659659, -0.2951976 ,  1.41039645, -0.79301737,
       -0.45313731,  0.99774024, -0.87353566, -0.78295259])

In [31]:
# evaluation the model
from sklearn.metrics import mean_squared_error, r2_score
print('MSE: ', mean_squared_error(y_test, y_pred))
print('R2: ', r2_score(y_test, y_pred))

MSE:  0.2764161891962634
R2:  0.5859353706229079


## Save and load a model

In [16]:
# save the model
import pickle
pickle.dump(model, open('./saved_models/model_01.pkl', 'wb'))

In [32]:
# load the model
import pickle
model_load = pickle.load(open('./saved_models/model_01.pkl', 'rb'))

In [34]:
model_load.predict([[30]])



array([4.05290414])