# NeuralNets
This notebook will contain my attempts using NeuralNetworks

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam, SGD, Adadelta
import keras.backend as K

from collections import defaultdict

from sklearn import warnings
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import recall_score, precision_score, f1_score
from sklearn.metrics import accuracy_score, auc, roc_auc_score, make_scorer
from sklearn.svm import SVC, LinearSVC, NuSVC

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## eBoss Data

Reading in the eboss data

In [3]:
raw_data_eBoss = pd.read_csv("../../../Data/Astronomy.csv")

Taking out the id column

In [3]:
trainable_data = raw_data_eBoss.iloc[:, 1:].copy(deep=True)

Seperating the X, and Y variables, and doing needed preprocessing.

In [4]:
X = trainable_data.iloc[:, :-1].copy(deep=True)
Y = trainable_data.iloc[:, -1:].copy(deep=True)
X = X.apply(pd.to_numeric, args={'errors': 'coerce'})
X = X.fillna('0')
Y = Y.Hits.map({'bad': 0, 'good': 1})

In [5]:
ss = MinMaxScaler()
df_scaled = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
X = df_scaled

Making the model starting with a simple 3 layer model with 20

In [6]:
def precision_no(y_true, y_pred):
    return precision_score(y_true, y_pred, pos_label=0)
def recall_no(y_true, y_pred):
    return recall_score(y_true, y_pred, pos_label=0)

In [18]:
def create_model(lr=.01):
    model = Sequential()
    model.add(Dense(96, input_dim=96, activation='relu'))
    model.add(Dense(20, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=lr), metrics=['accuracy'])
    return model


metrics=[precision_no, precision_score, recall_no, recall_score, accuracy_score,  f1_score, roc_auc_score]

kfold = KFold(n_splits=10, shuffle=True)
cvscores = defaultdict(list)
for train, test in kfold.split(X.values, Y.values):
    model = create_model(lr=.001)
    
    # Fit the model
    model.fit(X.values[train], Y.values[train], epochs=100, batch_size=8, verbose=0)
    
    # evaluate the model
    #scores = model.evaluate(X.values[test], Y.values[test], verbose=0)
    
    y_preds = model.predict_classes(X.values[test])
    y_true = Y.values[test]
    
    for metric in metrics:
        cvscores[metric.__name__].append(metric(y_true, y_preds))
        
    cvscores['roc_auc_real'].append(roc_auc_score(y_true, model.predict(X.values[test])))

In [19]:
for k,v in cvscores.items():
    print(k, ": ", np.mean(v))

roc_auc_real :  0.9687925170068027
precision_no :  0.9325396825396826
precision_score :  0.9379871577697665
recall_no :  0.7807539682539683
recall_score :  0.975909090909091
accuracy_score :  0.9326354679802955
f1_score :  0.9560583620401619
roc_auc_score :  0.8783315295815296


# Manga Data:

In [4]:
raw_data_Manga = pd.read_csv("../../../Data/Astronomy20000_Original.csv")

Seperating the X, and Y variables, and doing needed preprocessing.

In [137]:
X = raw_data_Manga.iloc[:, 1:-1].copy(deep=True)
Y = raw_data_Manga.iloc[:, [-1]].copy(deep=True)
X = X.fillna('0')
Y = Y.Hits.map({'bad': 0, 'good': 1})
X = X.iloc[:, np.r_[2:14, 26:len(X.columns)]].copy(deep=True)
ss = MinMaxScaler()
df_scaled = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
X = df_scaled

In [170]:
def create_model(lr=.01):
    model = Sequential()
    model.add(Dense(108, input_dim=108, activation='relu'))
    model.add(Dense(20, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=lr), metrics=['accuracy'])
    return model

In [177]:
mdl = create_model(lr=.001)
mdl.fit(X.values, Y.values, epochs=100, batch_size=128)

In [174]:
metrics=[accuracy_score, precision_score, precision_no, recall_score, recall_no, f1_score, roc_auc_score]

kfold = KFold(n_splits=10, shuffle=True)
cvscores = defaultdict(list)
ix = 1
for train, test in kfold.split(X.values, Y.values):
    print("Starting Fold: {}!".format(ix))
    ix += 1
    model = create_model()
    
    # Fit the model
    model.fit(X.values[train], Y.values[train], epochs=100, batch_size=128, verbose=0)
    
    # evaluate the model
    #scores = model.evaluate(X.values[test], Y.values[test], verbose=0)
    
    y_preds = model.predict_classes(X.values[test])
    y_true = Y.values[test]
    
    for metric in metrics:
        cvscores[metric.__name__].append(metric(y_true, y_preds))

Starting Fold: 1!
Starting Fold: 2!
Starting Fold: 3!
Starting Fold: 4!
Starting Fold: 5!
Starting Fold: 6!
Starting Fold: 7!
Starting Fold: 8!
Starting Fold: 9!
Starting Fold: 10!


In [176]:
for k,v in cvscores.items():
    print(k, ": ", np.mean(v))

accuracy_score :  0.8636025548525549
precision_score :  0.7962976241250492
precision_no :  0.8811265490050605
recall_score :  0.6151358123424362
recall_no :  0.9460719784443532
f1_score :  0.6917783117492088
roc_auc_score :  0.7806038953933946
