<a href="https://colab.research.google.com/github/SpoilStick/ML-Projects/blob/main/ML_Assignment_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Part 1: Implement a Perceptron

In [None]:
pip install -q keras-tuner scikeras

[K     |████████████████████████████████| 135 kB 9.7 MB/s 
[K     |████████████████████████████████| 1.6 MB 39.7 MB/s 
[?25h

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from seaborn import load_dataset
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler
from scikeras.wrappers import KerasClassifier
from scikeras.wrappers import KerasRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from sklearn.model_selection import GridSearchCV

In [None]:
diabetes = datasets.load_diabetes()

In [None]:
data = pd.DataFrame(diabetes.data)
features = diabetes.feature_names
data.columns = features

target = diabetes.target

In [None]:
X_train, X_test, y_train, y_test =  train_test_split(data, target, test_size=0.2, random_state=42)

In [None]:
def print_stats(model, data, targets):
  predictions = model.predict(data)
  
  r2 = r2_score(targets, predictions)
  RMSE = mean_squared_error(targets, predictions, squared=False)

  print("r2 Coefficient:", r2)
  print("RMSE:", RMSE)

In [None]:
perceptron = MLPRegressor().fit(X_train, y_train)



In [None]:
print_stats(perceptron, X_train, y_train)

r2 Coefficient: -3.024856917419166
RMSE: 156.38616491094476


# Part 2: Implement a Keras Classifier

In [None]:
# Data preparation, for simplicity it's all in one cell

# I'm using the same variable names
# I don't really reuse "data" from before
# It is somewhat annoying though
data = load_dataset("titanic")

data.drop("deck", axis=1)

age_dict = {}

for x in data['pclass'].unique():
    column = data.loc[data['pclass']==x]
    median = column['age'].median()
    age_dict[x] = median

def impute_age(pclass):
  if type(pclass) == int:
    return(age_dict[pclass])

  elif type(pclass) == pd.Series:
    for x in pclass:
      x = age_dict[x]
      return pclass

data['age'] = impute_age(data['pclass'])

data.dropna()

data = pd.get_dummies(data=data, columns=["sex", "embarked", "class", "who", "adult_male", "deck", "embark_town", "alive", "alone"])

X = data
y = X.pop('survived')

X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Apparently I have to define a function for the KerasClassifier
# I couldn't find much info about it or a different way
# I also couldn't find a way without using meta
def build_model(meta):
  print(meta["target_type_"])

  # create model
  model = Sequential()
  model.add(Input(meta["n_features_in_"]))
  model.add(Dense(meta["n_classes_"]))

  return model

In [None]:
classifier = KerasClassifier(model=build_model, loss="binary_crossentropy")

In [None]:
classifier.fit(X_train, y_train)

binary


KerasClassifier(
	model=<function build_model at 0x7f8fb75badd0>
	build_fn=None
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=binary_crossentropy
	metrics=None
	batch_size=None
	validation_batch_size=None
	verbose=1
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=1
	class_weight=None
)

In [None]:
y_pred = classifier.predict(X_test)

score = classifier.scorer(y_test, y_pred)
print(score)

0.6871508379888268


# Part 3: Implement a Keras Regressor

In [None]:
# Data preparation
# Rewriting 'data' again
data = pd.read_csv("bike_share_hour.csv")

data = data.drop(columns=["dteday"])
data.dropna()

scalar = StandardScaler()
scalar.fit(data)
scalar.transform(data)

data = data.drop(columns=["instant", "casual", "registered"])

X = data
y = X.pop("cnt")

X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=1/3, random_state=42)

In [None]:
# Using mean_squared_error did not work for the classifier
def build_regressor_model(n_hidden = 2, n_neurons = 100, input_shape = X_train.shape[1:]):
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=input_shape))
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation="relu"))
    model.add(keras.layers.Dense(1))
    model.compile(loss='mean_squared_error',metrics=['mae'], optimizer='adam')
    return model

In [None]:
# Keras sequential regressor
# (What's the difference?)
regressor = KerasRegressor(model=build_regressor_model, loss="mean_squared_error")

In [None]:
# Gives me an error, why?
regressor.fit(X_train, y_train)



KerasRegressor(
	model=<function build_regressor_model at 0x7f8fb0a773b0>
	build_fn=None
	warm_start=False
	random_state=None
	optimizer=rmsprop
	loss=mean_squared_error
	metrics=None
	batch_size=None
	validation_batch_size=None
	verbose=1
	callbacks=None
	validation_split=0.0
	shuffle=True
	run_eagerly=False
	epochs=1
)

In [None]:
y_pred = regressor.predict(X_test)

score = regressor.scorer(y_test, y_pred)
print(score)

0.288018610089649


# Part 4: Tune Your Keras Regressor

In [None]:
print(regressor.get_params().keys())

dict_keys(['model', 'build_fn', 'warm_start', 'random_state', 'optimizer', 'loss', 'metrics', 'batch_size', 'validation_batch_size', 'verbose', 'callbacks', 'validation_split', 'shuffle', 'run_eagerly', 'epochs'])


In [None]:
batch_size = [5, 10, 20]
epochs = [10, 50, 100]

param_grid = dict(batch_size=batch_size, epochs=epochs)

grid_search = GridSearchCV(regressor, param_grid, cv=5).fit(X_train, y_train)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/5

In [None]:
grid_search.best_params_

{'batch_size': 10, 'epochs': 50}

In [None]:
final_model = grid_search.best_estimator_

y_pred = final_model.predict(X_test)
score = final_model.scorer(y_test, y_pred)
print(score)

0.8822247657996198
