In [None]:
import numpy as np
import matplotlib.pylab as plt

import lit_ml_tools as lit

# This will reload modules that have been edited
%load_ext autoreload
%autoreload 2

### Things to Test
- neural net arg for colormap (line 198,355,380,396)
        - 380: weights
        - 396: biases
- look at MLPClassifier
- read about shapely values
- start testing NNs
- be able to explain *everything* - notes after each code block


In [None]:
# Manual Variables
nentries = 10000
nfeatures = 5

# Datasets
dataset1= lit.gen_original_data(nentries, nfeatures, dtype='normal') #dtype args: 'normal', 'squared', 'relativity'
dataset2= lit.shuffle_dataset(dataset1)

alldata,labels= lit.concat_dataset(dataset1, dataset2, wantplots=False)

lit.sumfunc(dataset1);  #A histogram of the sum of each row for nentries rows. Each row sums to one.
lit.histfunc(dataset1); #Histograms of each feature.

lit.sumfunc(dataset2);  #Because each feature was shuffled, the histogram of the sum of each row now forms a gaussian peaking at approximately one.
lit.histfunc(dataset2); #Each feature was shuffled within itself. Column of feature one shuffled, column of feature two shuffled, etc.. The histograms of each feature remains the same bc it is the same data.

In [None]:
#lit.correlations(dataset1, dataset2, label=0, colormap= plt.cm.Greens, wantplots=True, ax1=None)
#lit.correlations(dataset1, dataset2, label=1, colormap= plt.cm.Greens, wantplots=True, ax1=None)

In [None]:
w, b, roc = lit.neuralnet(dataset1, dataset2, num_hidden_layers=(nfeatures, nfeatures+2, nfeatures, nfeatures-2), wantplots=True)
lit.draw_network(b, w, figsize=(6, 6), colormap=plt.cm.Greens)

# [ROC classifications](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2935260/#:~:text=The%20area%20under%20the%20ROC,AUC%20values%20between%200.5%2D0.6.)
#### *how efficient the NN is*

We extract the AUC (area under the (ROC) curve)

excellent: 0.9-1
good:      0.8-0.9
fair:      0.7-0.8
poor:      0.6-0.7
failed:    0.5-0.6



In [None]:
def ROC_data(roc):
    roc_scores= []
    roc_values= []

    if roc >= 0.9:
        roc_scores.append('excellent')
        roc_values.append(roc)
    elif 0.8 <= roc < 0.9:
        roc_scores.append('good')
        roc_values.append(roc)
    elif 0.7 <= roc < 0.8:
        roc_scores.append('fair')
        roc_values.append(roc)
    elif 0.6 <= roc < 0.7:
        roc_scores.append('poor')
        roc_values.append(roc)
    elif roc < 0.6:
        roc_scores.append('failed')
        roc_values.append(roc)

    return roc_scores, roc_values

In [None]:
##generate n_arb amount of nodes for one HL
## should change n_arb to something like nnodes
n_arb= 8
roc_scores= []
roc_values= []

fig = plt.figure(figsize=(16,12))
for i in range(n_arb):
    plt.subplot(4,4,i+1)

    w,b,roc= lit.neuralnet(dataset1, dataset2, num_hidden_layers=(nfeatures, i+1), wantplots=False)
    lit.draw_network(b,w, ax=plt.gca(), colormap=plt.cm.Greens)

    roc_scores.append(ROC_data(roc)[0])
    roc_values.append(ROC_data(roc)[1])


In [None]:
print(roc_scores) ##can have this be printed on each diagram?
print(roc_values)

In [None]:
##added multiple trials
n_arb= 5
ntrials= 4

roc_scores= []
roc_values= []

for j in range(ntrials):
    fig = plt.figure(figsize=(ntrials*n_arb,n_arb+n_arb))

    roc_scores_pertrial=[]
    roc_values_pertrial=[]
    roc_scores.append(roc_scores_pertrial)
    roc_values.append(roc_values_pertrial)

    for i in range(n_arb):
        plt.subplot(ntrials,n_arb,i+1)

        w,b,roc= lit.neuralnet(dataset1, dataset2, num_hidden_layers=(nfeatures, i+1), wantplots=False)
        lit.draw_network(b,w, ax=plt.gca(), colormap=plt.cm.Greens)

        roc_scores_pertrial.append(ROC_data(roc)[0])
        roc_values_pertrial.append(ROC_data(roc)[1])

In [None]:
narb_list= []
for i in range(n_arb):
    narb_list.append(i+1)

print('Number of Nodes:',narb_list)
for i in range(ntrials):
    print(roc_scores[i])


In [None]:
n_arb= 5
ntrials= 2

roc_scores= []
roc_values= []
HL_nodes= []

for j in range(ntrials):

    roc_scores_pertrial=[]
    roc_values_pertrial=[]
    HL_nodes_pertrial= []

    roc_scores.append(roc_scores_pertrial)
    roc_values.append(roc_values_pertrial)
    HL_nodes.append(HL_nodes_pertrial)

    for hl1 in range(n_arb):
        for hl2 in range(n_arb):
            plt.figure(figsize=(7,4))

            w,b,roc= lit.neuralnet(dataset1, dataset2, num_hidden_layers=(nfeatures, hl1+1, hl2+1), wantplots=False)
            lit.draw_network(b,w, ax=plt.gca(), colormap=plt.cm.Greens)

            roc_scores_pertrial.append(ROC_data(roc)[0])
            roc_values_pertrial.append(ROC_data(roc)[1])
            HL_nodes_pertrial.append([hl1+1, hl2+1])

# ROC Analysis

In [None]:
## sorting node patterns
excellent= []
good= []
fair= []
poor= []
failed= []
scores_and_nodes= [excellent,good,fair,poor,failed]

for t in range(ntrials):
    for i in range(len(HL_nodes[t])):
        #print(HL_nodes[t][i],roc_scores[t][i])
        if roc_scores[t][i] == ['excellent']:
            excellent.append(HL_nodes[t][i])
        elif roc_scores[t][i] == ['good']:
            good.append(HL_nodes[t][i])
        elif roc_scores[t][i] == ['fair']:
            fair.append(HL_nodes[t][i])
        elif roc_scores[t][i] == ['poor']:
            poor.append(HL_nodes[t][i])
        elif roc_scores[t][i] == ['failed']:
            failed.append(HL_nodes[t][i])

print('Should be Equal:',len(excellent)+len(good)+len(fair)+len(poor)+len(failed),',', len(HL_nodes[0]*ntrials))

[How i did what is below](https://www.trainingint.com/how-to-find-duplicates-in-a-python-list.html)

In [None]:
## Checking for repeats of node patterns
# Unique lists
u_excellent= []
u_good= []
u_fair= []
u_poor= []
u_failed= []

# Repeat lists
r_excellent= []
r_good= []
r_fair= []
r_poor= []
r_failed= []

for i in range(len(scores_and_nodes)):  # cycle through each class
    for j in scores_and_nodes[i]: # cycle through each element in each class
        if i == 0:
            if j not in u_excellent:
                u_excellent.append(j)
            else:
                r_excellent.append(j)
        elif i == 1:
            if j not in u_good:
                u_good.append(j)
            else:
                r_good.append(j)
        elif i == 2:
            if j not in u_fair:
                u_fair.append(j)
            else:
                r_fair.append(j)
        elif i == 3:
            if j not in u_poor:
                u_poor.append(j)
            else:
                r_poor.append(j)

        elif i == 4:
            if j not in u_failed:
                u_failed.append(j)
            else:
                r_failed.append(j)


In [None]:
## IT WORKS !!!!
#print(excellent)
print('best of best:',r_excellent,'\n')
#print(failed)
print('worst of worst:',r_failed)
