# CS6830 Project 8
Seth Beckett and Jasper Swenson

In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import random
from sklearn.tree import export_graphviz
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support
from IPython.display import SVG
# You may need to install the Python graphviz library. At the command line:
#   pip install graphviz
# You will also need to install the graphviz executables. You can use apt,
# macports, or other installer for your system.
from graphviz import Source
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

## Decision Trees

In [None]:
df=pd.read_csv('glass.csv')
model_precision = {"Max Depth 3": 0, "Max Depth 5": 0}
model_recall = {"Max Depth 3": 0, "Max Depth 5": 0}
model_fscore = {"Max Depth 3": 0, "Max Depth 5": 0}
display(df)

feature_cols = ["RI", "Na", "Mg", "Al", "Si", "K", "Ca", "Ba", "Fe"]
df_x = df[feature_cols]
df_y = df[["Type"]]
# display(df_x)
# display(df_y)

x = df_x.values
y = df_y.values
print(x[0])
print(y[0])

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25)
print(xtrain)

In [None]:
treeclf1 = DecisionTreeClassifier(max_depth=3, random_state=1)
treeclf1.fit(xtrain, ytrain)
y_pred = treeclf1.predict(xtest)
p,r,f,s = precision_recall_fscore_support(ytest, y_pred)
model_fscore["Max Depth 3"] = [(f[0]+f[1] / 2)]
model_recall["Max Depth 3"] = [(r[0]+r[1] / 2)]
model_precision["Max Depth 3"] = [(p[0]+p[1] / 2)]
display('Depth 3 precision = {}'.format(p))
display('Depth 3 recall = {}'.format(r))
display('Depth 3 f-score = {}'.format(f))

treeclf2 = DecisionTreeClassifier(max_depth=5, random_state=1)
treeclf2.fit(xtrain, ytrain)
y_pred = treeclf2.predict(xtest)
p,r,f,s = precision_recall_fscore_support(ytest, y_pred)
model_fscore["Max Depth 5"] = [(f[0]+f[1] / 2)]
model_recall["Depth 5"] = [(r[0]+r[1] / 2)]
model_precision["Max Depth 5"] = [(p[0]+p[1] / 2)]
display('Depth 5 precision = {}'.format(p))
display('Depth 5 recall = {}'.format(r))
display('Max Depth 5 f-score = {}'.format(f))

In [None]:
graph1 = Source(tree.export_graphviz(treeclf1, out_file=None,
                                    feature_names=feature_cols,
                                    class_names=['1', '2', '3', '4', '5', '6', '7'], filled = True))
display(SVG(graph1.pipe(format='svg')))

graph2 = Source(tree.export_graphviz(treeclf2, out_file=None,
                                    feature_names=feature_cols,
                                    class_names=['1', '2', '3', '4', '5', '6', '7'], filled = True))
display(SVG(graph2.pipe(format='svg')))



In [None]:
importance1 = pd.DataFrame({'feature':feature_cols, 'importance':treeclf1.feature_importances_})
display(importance1)
importance1.plot(kind='barh', figsize=(9, 7))
plt.title('Feature Significance For DecisionTreeClassifier With Max Depth 3')
plt.axvline(x=0, color='.5')
plt.subplots_adjust(left=.3)

importance2 = pd.DataFrame({'feature':feature_cols, 'importance':treeclf2.feature_importances_})
display(importance2)
importance2.plot(kind='barh', figsize=(9, 7))
plt.title('Feature Significance For DecisionTreeClassifier With Max Depth 5')
plt.axvline(x=0, color='.5')
plt.subplots_adjust(left=.3)

In [None]:
precision_df = pd.DataFrame.from_dict(model_precision)
display(precision_df)
display(precision_df.plot.bar().set_title("DecisionTreeClassifier Model Precision Score"))
fscore_df = pd.DataFrame.from_dict(model_fscore)
display(fscore_df)
display(fscore_df.plot.bar().set_title("Model FScore"))
recall_df = pd.DataFrame.from_dict(model_recall)
display(recall_df)
display(recall_df.plot.bar().set_title("Model Recall Score"))

## Neural Networks

In [None]:
# get necessary libraries
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV, cross_validate, cross_val_predict
from sklearn.metrics import classification_report,confusion_matrix, ConfusionMatrixDisplay
import networkx as nx
import colorsys

In [None]:
# prep data
scaler = StandardScaler()
scaler.fit(x)
x_scaled = scaler.transform(x)


In [None]:
# use gridsearch to find best parameters for mlp classifier
mlp = MLPClassifier(max_iter=5000)
param_grid = {'hidden_layer_sizes': [(50, 10), (50, 25), (50, 50), (50, 100), (100, 10), (100, 25), (100, 50), (100, 100)],
                'activation': ['identity', 'logistic', 'tanh', 'relu'],
                'solver': ['sgd', 'adam', 'lbfgs']}
grid = GridSearchCV(mlp, param_grid, n_jobs=-1)
grid.fit(x_scaled, y.ravel())

# print best parameters
print(grid.best_params_)
print(grid.best_estimator_)
print(grid.best_score_)

In [None]:
# more paramsearch
mlp = MLPClassifier(max_iter=1000)
param_grid = {
    'hidden_layer_sizes': [
        (10,), (20,), (30,), (40,), (50,),  # single hidden layer
        (10, 10), (20, 20), (30, 30), (40, 40), (50, 50),  # two hidden layers
        (10, 20), (20, 10), (20, 30), (30, 20), (30, 40), (40, 30),  # two hidden layers with different sizes
        (10, 10, 10), (20, 20, 20), (30, 30, 30), (40, 40, 40), (50, 50, 50),  # three hidden layers
        (10, 20, 30), (30, 20, 10), (10, 30, 50), (50, 30, 10)  # three hidden layers with different sizes
    ],
    'activation': ['identity', 'logistic', 'tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.001, 0.01, 0.05, 0.1]
}

grid = GridSearchCV(mlp, param_grid, n_jobs=-1)
grid.fit(x_scaled, y.ravel())

# print best parameters
print(grid.best_params_)
print(grid.best_estimator_)
print(grid.best_score_)

In [None]:
# use best parameters to create mlp classifier
mlp = MLPClassifier(hidden_layer_sizes=(10, 20), activation='tanh', solver='adam', max_iter=5000, alpha=0.05)
mlp.fit(xtrain, ytrain.ravel())
predictions = mlp.predict(xtest)

# print classification report
print(classification_report(ytest,predictions))

In [None]:
# use cross_val_predict instead since we have a small dataset
predictions = cross_val_predict(mlp, x_scaled, y.ravel(), cv=7)
print(classification_report(y,predictions))

In [None]:
# visualize nn
def show_ann(mlp):
    hidden_layers_n = len(mlp.coefs_)-1
    layers_n = hidden_layers_n + 2
    input_neurons_n = len(mlp.coefs_[0])
    hidden_neurons_n = [len(mlp.coefs_[i+1]) for i in range(hidden_layers_n)]
    output_neurons_n = len(mlp.coefs_[-1][0])

    G = nx.DiGraph()
    pos = {}

    # Create the neurons of the input layer
    for i in range(input_neurons_n):
        pos['Layer0_{}'.format(i)] = (i,layers_n-1)

    for j in range(hidden_layers_n):
        # Create the neurons of the j'th hidden layer
        prev_layer = j
        cur_layer = j+1
        if (j == 0):
            prev_size = input_neurons_n
        else:
            prev_size = hidden_neurons_n[j-1]
        for i in range(hidden_neurons_n[j]):
            pos['Layer{}_{}'.format(cur_layer,i)] = (i,layers_n-1-cur_layer)
            for k in range(prev_size):
                w = mlp.coefs_[prev_layer][k][i]
                G.add_edge('Layer{}_{}'.format(prev_layer,k),'Layer{}_{}'.format(cur_layer,i), weight=w)

    # Create the neurons of the output layer
    prev_layer = hidden_layers_n
    cur_layer = hidden_layers_n+1
    for i in range(output_neurons_n):
        pos['Layer{}_{}'.format(cur_layer,i)] = (i,layers_n-1-cur_layer)
        for k in range(hidden_neurons_n[-1]):
            w = mlp.coefs_[prev_layer][k][i]
            G.add_edge('Layer{}_{}'.format(prev_layer,k),'Layer{}_{}'.format(cur_layer,i), weight=w)

    edges = G.edges()
    colors = [colorsys.hsv_to_rgb(0 if G[u][v]['weight'] < 0 else 0.65,
                                  1,#min(1, abs(G[u][v]['weight'])),
                                  1) for u,v in edges]
    weights = [abs(G[u][v]['weight'])*2 for u,v in edges]

    nx.draw(G, pos, node_color='y', node_size=450, width=weights, edge_color=colors)
    
show_ann(mlp)