In [445]:
import pandas as pd
import numpy as np
import seaborn as sns
import math
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier, KerasRegressor

## Part 1: Implement a Perceptron
Given the diabetes dataset you used during Assignment 2, implement an MLP Regressor.

In [446]:
from sklearn.datasets import load_diabetes

In [447]:
db_data = load_diabetes()
print(db_data.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - age     age in years
      - sex
      - bmi     body mass index
      - bp      average blood pressure
      - s1      tc, total serum cholesterol
      - s2      ldl, low-density lipoproteins
      - s3      hdl, high-density lipoproteins
      - s4      tch, total cholesterol / HDL
      - s5      ltg, possibly log of serum triglycerides level
      - s6      glu, blood sugar level

Note: Each of these 1

In [448]:
db_data.feature_names

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [449]:
diabetes_df = pd.DataFrame(data= np.c_[db_data['data'], db_data['target']], columns = db_data['feature_names']+ ['target'])
diabetes_df.head(5)

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0


#### Splitting dataset into training and test set

In [450]:
y = diabetes_df['target']
X = diabetes_df.drop('target', axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#### Implementing MLP Regressor

In [451]:
mlp = MLPRegressor(max_iter=5000)
mlp.fit(X_train, y_train)
test_preds = regr.predict(X_test)
print("MSE of MLP:", mean_squared_error(y_test, test_preds))
print("R2 of MLP:", r2_score(y_test, test_preds))

MSE of MLP: 2837.1833339955097
R2 of MLP: 0.4744309774209077


## Part 2: Implement a Keras Classifier

Given the prepared Titanic dataset from Assignment 3, implement aKerassequential classifier with relu activation functions.

In [452]:
titanic = sns.load_dataset("titanic")

In [453]:
titanic.head(5)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


#### Looking at the Variable Types

In [454]:
titanic.dtypes

survived          int64
pclass            int64
sex              object
age             float64
sibsp             int64
parch             int64
fare            float64
embarked         object
class          category
who              object
adult_male         bool
deck           category
embark_town      object
alive            object
alone              bool
dtype: object

In [455]:
for col in ['sex', 'embarked', 'class', 'who', 'embark_town']:
    titanic[col] = titanic[col].astype('category')

In [456]:
cat_vars = {False:0, True:1, 'yes':1, 'no':0}
for col in ['adult_male', 'alive', 'alone']:
    titanic[col] = titanic[col].map(cat_vars)

In [457]:
titanic.dtypes

survived          int64
pclass            int64
sex            category
age             float64
sibsp             int64
parch             int64
fare            float64
embarked       category
class          category
who            category
adult_male        int64
deck           category
embark_town    category
alive             int64
alone             int64
dtype: object

#### Data Preprocessing

In [458]:
med1 = titanic[titanic['pclass'] == 1]['age'].median()
med2 = titanic[titanic['pclass'] == 2]['age'].median()
med3 = titanic[titanic['pclass'] == 3]['age'].median()

In [459]:
print(med1)
print(med2)
print(med3)

37.0
29.0
24.0


In [460]:
titanic.head(5)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,1,,Southampton,0,0
1,1,1,female,38.0,1,0,71.2833,C,First,woman,0,C,Cherbourg,1,0
2,1,3,female,26.0,0,0,7.925,S,Third,woman,0,,Southampton,1,1
3,1,1,female,35.0,1,0,53.1,S,First,woman,0,C,Southampton,1,0
4,0,3,male,35.0,0,0,8.05,S,Third,man,1,,Southampton,0,1


In [461]:
# Dropping column deck feature, and other non-necessary features
titanic.drop(columns=['deck', 'alive', 'who'], inplace =True)

In [462]:
def impute_age(x):
    """Uses median age of pclass to fix the null values in the age attribute"""
    if x['pclass'] == 1:
        if math.isnan(x['age']):
            return med1
        else:
            return x['age']
    elif x['pclass'] == 2:
        if math.isnan(x['age']):
            return med2
        else:
            return x['age']
    elif x['pclass'] == 3:
        if math.isnan(x['age']):
            return med3
        else:
            return x['age']
    
titanic['age'] = titanic[['age','pclass']].apply(impute_age,axis=1)

In [463]:
titanic.isnull().sum()

survived       0
pclass         0
sex            0
age            0
sibsp          0
parch          0
fare           0
embarked       2
class          0
adult_male     0
embark_town    2
alone          0
dtype: int64

In [464]:
# Drop the remaining records the are missing.
titanic.dropna(inplace = True)

In [465]:
titanic.isnull().sum()

survived       0
pclass         0
sex            0
age            0
sibsp          0
parch          0
fare           0
embarked       0
class          0
adult_male     0
embark_town    0
alone          0
dtype: int64

##### Convert Categorical Variables to Dummy Variables

Creating a dummy for pclass as well since we are dealing with Class 1, Class 2, and Class 3. Each class could be important in determining whether a passenger lives or dies. 

In [466]:
for col in ['pclass', 'sex', 'embarked', 'class', 'embark_town']:
    temp = pd.get_dummies(titanic[col],prefix=col, prefix_sep='')
    titanic= pd.concat([titanic, temp], axis=1)
titanic.rename(columns={1: '1', 2:'2', 3:'3'}, inplace = True)

In [467]:
titanic.head(5)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,adult_male,...,sexmale,embarkedC,embarkedQ,embarkedS,classFirst,classSecond,classThird,embark_townCherbourg,embark_townQueenstown,embark_townSouthampton
0,0,3,male,22.0,1,0,7.25,S,Third,1,...,1,0,0,1,0,0,1,0,0,1
1,1,1,female,38.0,1,0,71.2833,C,First,0,...,0,1,0,0,1,0,0,1,0,0
2,1,3,female,26.0,0,0,7.925,S,Third,0,...,0,0,0,1,0,0,1,0,0,1
3,1,1,female,35.0,1,0,53.1,S,First,0,...,0,0,0,1,1,0,0,0,0,1
4,0,3,male,35.0,0,0,8.05,S,Third,1,...,1,0,0,1,0,0,1,0,0,1


In [468]:
# dropping name essential columns
titanic.drop(columns = ['pclass', 'sex', 'embarked', 'class', 'embark_town'], inplace = True)

In [469]:
# copying dataframe to a new dataframe
titanicdf = titanic.copy()

#### Creating Train and Test Sets

In [470]:
#Sruvived column will be the target column. Doing a 70/30 split
y = titanicdf['survived']
X = titanicdf.drop('survived', axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

### Model Keras Sequential Classifier with Relu Activation

In [472]:
import tensorflow as tf
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers import Dense

In [473]:
def model_creation():
    '''Create the model for Keras Classifier'''
    model= Sequential()
    model.add(Dense(3, activation = 'relu', input_shape=(20,)))
    model.add(Dense(2, activation = 'relu'))
    model.add(Dense(1, activation = 'relu'))

    model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

    return model 

In [474]:
# Fit the data
model = KerasClassifier(model=model_creation)
model.fit(X_train, y_train)



KerasClassifier(
	model=<function model_creation at 0x0000019B61C2B700>
	build_fn=None
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=None
	metrics=None
	batch_size=None
	validation_batch_size=None
	verbose=1
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=1
	class_weight=None
)

## Part 3: Implement a Keras Regressor

Given the prepared bike-share dataset from Assignment 4, implement aKeras sequential regressor with relu activation functions.

##### Read the dataframe

In [475]:
bike_share = pd.read_csv("bike_share_hour.csv", skipinitialspace=True)
bike_share.head(5)

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


##### Look at the dataset and covert columns categorical to category type

In [476]:
bike_share.dtypes

instant         int64
dteday         object
season          int64
yr              int64
mnth            int64
hr              int64
holiday         int64
weekday         int64
workingday      int64
weathersit      int64
temp          float64
atemp         float64
hum           float64
windspeed     float64
casual          int64
registered      int64
cnt             int64
dtype: object

In [477]:
for col in ['season', 'yr', 'mnth',  'holiday', 'weekday', 'workingday', 'weathersit']:
    bike_share[col] = bike_share[col].astype('category')

In [478]:
# changing dteday to time stamp
bike_share['dteday'] = pd.to_datetime(bike_share['dteday'])

##### Scale the numerical features using StandardScaler(), and replacetheoriginal columns in your dataframe.

In [479]:
from sklearn.preprocessing import StandardScaler

In [480]:
for col in ['instant', 'hr', 'temp', 'atemp', 'hum', 'windspeed', 'casual', 'registered', 'cnt']:
    arr = np.array(bike_share[col])
    scaler = StandardScaler()
    scaler.fit(arr.reshape(-1,1))
    bike_share[col]=scaler.transform(arr.reshape(-1,1))

##### Drop the following columns from your dataset: casual, registered, dteday, instant.

In [481]:
bike_share.drop(columns = ['casual', 'registered', 'dteday', 'instant'], inplace = True)

##### Create one-hot-encoded values for your categorical columns usingget_dummies and add them to your source dataset.

In [482]:
for col in ['season', 'yr', 'mnth',  'holiday', 'weekday', 'workingday', 'weathersit']:
    temp = pd.get_dummies(bike_share[col], prefix=col)
    bike_share = bike_share.join(temp)

##### Drop the original categorical columns from your source dataset.

In [483]:
 bike_share = bike_share.drop(['season', 'yr', 'mnth',  'holiday', 'weekday', 'workingday', 'weathersit'], axis = 1)

In [484]:
y = bike_share['cnt']
X = bike_share.drop('cnt', axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

### Model Keras Sequential Regressor with Relu Activation

In [485]:
def model_creation():
    '''Create the model for Keras Classifier or Regressor'''
    model= Sequential()
    model.add(Dense(3, activation = 'relu', input_shape=(38,)))
    model.add(Dense(2, activation = 'relu'))
    model.add(Dense(1, activation = 'relu'))

    model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])

    return model 

In [486]:
# Fit the data
model = KerasRegressor(model=model_creation)
model.fit(X_train, y_train)



KerasRegressor(
	model=<function model_creation at 0x0000019B7ED07700>
	build_fn=None
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=None
	metrics=None
	batch_size=None
	validation_batch_size=None
	verbose=1
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=1
)

## Part 4: Tune Your Keras Regressor

Tune your Keras regressor from Part 3 by implementing a gridsearch with different optimizers.

In [487]:
epochs = [5, 10, 20]
batch_sizes = [1, 5, 10]
param_dict = dict(epochs=epochs, batch_size=batch_sizes)

In [488]:
# GridSearchCV
grid = GridSearchCV(estimator = model, param_grid = param_dict, cv=3, n_jobs = 1)
grid.fit(X_train, y_train)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10


Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20


Epoch 19/20
Epoch 20/20
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20


Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


GridSearchCV(cv=3,
             estimator=KerasRegressor(model=<function model_creation at 0x0000019B7ED07700>),
             n_jobs=1,
             param_grid={'batch_size': [1, 5, 10], 'epochs': [5, 10, 20]})

In [491]:
# Best Parameters
grid.best_params_

{'batch_size': 1, 'epochs': 20}

In [492]:
# Utilizing grid.best_params_
model = KerasRegressor(model=model_creation, epochs=20, batch_size=1)
model.fit(X_train, y_train)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


KerasRegressor(
	model=<function model_creation at 0x0000019B7ED07700>
	build_fn=None
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=None
	metrics=None
	batch_size=1
	validation_batch_size=None
	verbose=1
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=20
)

In [498]:
# Getting Predictions and Outputting MSE
test_preds = model.predict(X_test)
print("MSE: ", mean_squared_error(y_test, test_preds))

MSE:  0.9700476976293557
