<a href="https://colab.research.google.com/github/shawn-kg/Projects-in-Machine-Learning-and-AI/blob/main/FinalProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Decision Tree Implementation

In [None]:
import pandas as pd
import numpy as np
import math


from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics # Import scikit-learn metrics module for accuracy calculation
 
# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from sklearn import tree
from sklearn.tree import export_text
import graphviz

col_names = ['target', 'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave_points_mean', 'symmetry_mean', 'fractal_dimension_mean', 'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se', 'compactness_se', 'concavity_se', 'concave_points_se', 'symmetry_se', 'fractal_dimension_se', 'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst', 'concavity_worst', 'concave_points_worst', 'symmetry_worst', 'fractal_dimension_worst' ]
cancer_data = pd.read_csv('drive/MyDrive/data_augmented.csv', header=None, names=col_names)
randomized_data = cancer_data.reindex(np.random.permutation(cancer_data.index))

feature_cols = ['radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave_points_mean', 'symmetry_mean', 'fractal_dimension_mean', 'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se', 'compactness_se', 'concavity_se', 'concave_points_se', 'symmetry_se', 'fractal_dimension_se', 'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst', 'concavity_worst', 'concave_points_worst', 'symmetry_worst', 'fractal_dimension_worst']
X = randomized_data[feature_cols]
y = randomized_data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=1) # 70% training and 30% test

# Initialize our decision tree object
classification_tree = tree.DecisionTreeClassifier()
classification_tree = classification_tree.fit(X_train, y_train)

y_pred = classification_tree.predict(X_test)

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test,y_pred))
print("Recall:",metrics.recall_score(y_test,y_pred))

dot_data = tree.export_graphviz(classification_tree, out_file=None) 
graph = graphviz.Source(dot_data) 
graph.render("dec_tree1")

# Initialize our decision tree object
classification_tree = tree.DecisionTreeClassifier()

classification_tree.max_depth = 4
# Train our decision tree (tree induction + pruning)
classification_tree = classification_tree.fit(X_train, y_train)

y_pred = classification_tree.predict(X_test)

dot_data = tree.export_graphviz(classification_tree, out_file=None) 
graph = graphviz.Source(dot_data) 
graph.render("dec_tree2") 

print("\nAccuracy  with depth of 4:",metrics.accuracy_score(y_test, y_pred))
print("Precision with depth of 4:",metrics.precision_score(y_test,y_pred))
print("Recall with depth of 4:",metrics.recall_score(y_test,y_pred))

Accuracy: 0.935672514619883
Precision: 0.972972972972973
Recall: 0.9310344827586207

Accuracy  with depth of 4: 0.9298245614035088
Precision with depth of 4: 0.9727272727272728
Recall with depth of 4: 0.9224137931034483


#Random Forest and Gradient Booster Optimizations

In [None]:
from numpy import mean
from numpy import std
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

model = RandomForestClassifier(max_depth=5)
cv = RepeatedStratifiedKFold(n_splits=10,n_repeats=3,random_state=1)
n_scores = cross_val_score(model, X, y, scoring= 'accuracy', cv=cv, n_jobs=-1,error_score='raise')
o_scores = cross_val_score(model, X, y, scoring= 'precision', cv=cv, n_jobs=-1, error_score='raise')
p_scores = cross_val_score(model, X, y, scoring = 'recall', cv=cv, n_jobs=-1, error_score='raise')

print('Random Forest Accuracy: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))
print('Random Forest Precision: %.3f (%.3f)' % (mean(o_scores), std(n_scores)))
print('Random Forest Recall: %.3f (%.3f)' % (mean(p_scores), std(p_scores)))



#Boosting Gradient Boost Algorithm
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=5, random_state=0)
q_scores = cross_val_score(clf,X,y,scoring= 'accuracy',cv=cv,n_jobs=-1,error_score='raise')
r_scores = cross_val_score(clf,X,y,scoring= 'precision',cv=cv,n_jobs=-1,error_score='raise')
s_scores = cross_val_score(clf,X,y,scoring = 'recall', cv = cv,n_jobs=-1,error_score='raise')

print('\nGradient Boosting Classifier Accuracy: %.3f (%.3f)' % (mean(q_scores), std(q_scores)))
print('Gradient Boosting Classifier Precision: %.3f (%.3f)' % (mean(r_scores), std(r_scores)))
print('Gradient Boosting Classifier Recall: %.3f (%.3f)' % (mean(s_scores), std(s_scores)))

Random Forest Accuracy: 0.957 (0.024)
Random Forest Precision: 0.962 (0.024)
Random Forest Recall: 0.979 (0.024)

Gradient Boosting Classifier Accuracy: 0.958 (0.025)
Gradient Boosting Classifier Precision: 0.959 (0.033)
Gradient Boosting Classifier Recall: 0.976 (0.026)


#Deep Neural Network

In [None]:
from tensorflow.keras.layers import Dense
from tensorflow.keras import Sequential

X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, test_size=0.25, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
n_features = X_train.shape[1]

# create the model
model = Sequential()
model.add(Dense(6, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
model.add(Dense(1,activation = 'sigmoid'))
opt = tf.keras.optimizers.RMSprop(learning_rate=0.01, momentum=0.9)
model.compile(optimizer = opt, loss = 'binary_crossentropy', metrics = ['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

# fitting the model
model.fit(X_train, y_train, epochs=150, batch_size=32, verbose=0)
# evaluate
loss, acc, prec, recall = model.evaluate(X_dev,y_dev,verbose=0)
print("Loss: ", loss, "Acc: ", acc, "Precision: ", prec, "Recall: ", recall)

test_loss, test_acc, test_prec, test_recall = model.evaluate(X_test, y_test, verbose=0)
print("Test Loss: ", test_loss, "Test Accuracy: ", test_acc, "Test Precision: ", test_prec, "Test Recall: ", test_recall)

 



(298, 30) (171, 30) (298,) (171,)
Loss:  0.16466128826141357 Acc:  0.9599999785423279 Precision:  0.95652174949646 Recall:  0.9850746393203735
Test Loss:  0.13441075384616852 Test Accuracy:  0.9590643048286438 Test Precision:  0.9430894255638123 Test Recall:  1.0


#Deep Learning 2.0

In [None]:
model2 = Sequential()
# first hidden layer
model2.add(Dense(units=9,kernel_initializer='he_uniform',activation='relu',input_shape=(n_features,)))
# second hidden layer
model2.add(Dense(units=9,kernel_initializer='he_uniform',activation='relu'))
#output layer
model2.add(Dense(units=1,kernel_initializer='glorot_uniform',activation='sigmoid'))

model2.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])

model2.fit(X_train, y_train, batch_size = 100, epochs = 150)

loss, acc, prec, recall = model.evaluate(X_dev,y_dev,verbose=0)

print("Loss: ", loss, "Acc: ", acc, "Precision: ", prec, "Recall: ", recall)

test_loss, test_acc, test_prec, test_recall = model.evaluate(X_test, y_test, verbose=0)
print("Test Loss: ", test_loss, "Test Accuracy: ", test_acc, "Test Precision: ", test_prec, "Test Recall: ", test_recall)


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

# Hyperparameter Tuning

In [None]:
!pip install -q -U keras-tuner

[?25l[K     |███▍                            | 10 kB 20.9 MB/s eta 0:00:01[K     |██████▊                         | 20 kB 26.2 MB/s eta 0:00:01[K     |██████████                      | 30 kB 29.1 MB/s eta 0:00:01[K     |█████████████▍                  | 40 kB 31.8 MB/s eta 0:00:01[K     |████████████████▊               | 51 kB 33.5 MB/s eta 0:00:01[K     |████████████████████            | 61 kB 36.7 MB/s eta 0:00:01[K     |███████████████████████▍        | 71 kB 29.7 MB/s eta 0:00:01[K     |██████████████████████████▊     | 81 kB 30.5 MB/s eta 0:00:01[K     |██████████████████████████████  | 92 kB 31.9 MB/s eta 0:00:01[K     |████████████████████████████████| 98 kB 5.9 MB/s 
[?25h

In [None]:
import kerastuner as kt
import tensorflow as tf

def build_model(hp):
  model = Sequential()

  hp_units = hp.Int('units', min_value = 5, max_value = 21, step =2)
  
  model.add(Dense(units = hp_units, activation = 'relu', kernel_initializer='he_normal', input_shape=(n_features,)))

  # Tune whether to use dropout.
  if hp.Boolean("dropout"):
    model.add(layers.Dropout(rate=0.25))
  
  model.add(Dense(1,activation = 'sigmoid'))

  hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3,1e-4])
  opt = tf.keras.optimizers.RMSprop(learning_rate=hp_learning_rate, momentum=0.9)
  model.compile(optimizer = opt, loss = 'binary_crossentropy', metrics = ['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
  
  return model

tuner = kt.RandomSearch(build_model,
                        objective = 'val_accuracy', 
                        max_trials = 20,
                        directory = 'random_search_starter',
                        project_name = 'intro_to_kt') 
                     
tuner.search(X_train, y_train, epochs = 100, validation_data = (X_dev, y_dev))

best_model = tuner.get_best_models(1)[0]

best_hyperparameters = tuner.get_best_hyperparameters(1)[0] 

test_loss, test_acc, test_prec, test_recall = model.evaluate(X_test, y_test, verbose=0)
print("Test Loss: ", test_loss, "Test Accuracy: ", test_acc, "Test Precision: ", test_prec, "Test Recall: ", test_recall)




INFO:tensorflow:Reloading Oracle from existing project random_search_starter/intro_to_kt/oracle.json
INFO:tensorflow:Reloading Tuner from random_search_starter/intro_to_kt/tuner0.json
INFO:tensorflow:Oracle triggered exit
Test Loss:  0.13441075384616852 Test Accuracy:  0.9590643048286438 Test Precision:  0.9430894255638123 Test Recall:  1.0
