In [None]:
import numpy as np
import matplotlib.pylab as plt

import lit_ml_tools as lit

# This will reload modules that have been edited
%load_ext autoreload
%autoreload 2

### Things to Test
- neural net arg for colormap (line 198,355,380,396)
        - 380: weights
        - 396: biases
- look at MLPClassifier
- read about shapely values
- start testing NNs
- be able to explain *everything* - notes after each code block


In [None]:
# Datasets
nentries = 10000
nfeatures = 5

dataset1= lit.gen_original_data(nentries, nfeatures, dtype='normal') #dtype args: 'normal', 'squared', 'relativity'
dataset2= lit.shuffle_dataset(dataset1)


In [None]:
dataset1

In [None]:
# Additional Plots
'''
alldata,labels= lit.concat_dataset(dataset1, dataset2, wantplots=True)

lit.sumfunc(dataset1)
#lit.histfunc(dataset1)
lit.sumfunc(dataset2)
#lit.histfunc(dataset2)

lit.correlations(dataset1, dataset2, label=0, colormap= plt.cm.Greens, wantplots=True, ax1=None)
lit.correlations(dataset1, dataset2, label=1, colormap= plt.cm.Greens, wantplots=True, ax1=None)
'''

In [None]:
nHL= 2    # number of HLs (only up to 2 for now)
nnode= 4  # formerly n_arb
ntrials= 2

print(f'Best Node Pattern(s) for {nHL} Hidden Layers:')
lit.best_node_pattern(dataset1,dataset2,nentries,nfeatures,nnode,ntrials,nHL, wantplots= True) ## only works for up to 2 HLs

# Flattening Approach

In [None]:
def excellent_training(dataset1, dataset2, num_hidden_layers= (nfeatures,3,8,4)):
    auc = 0
    w,b = None,None

    while auc < 0.9:
        w, b, auc = lit.neuralnet(dataset1, dataset2, num_hidden_layers)

        print(f"auc: {auc}")

    # Draws NN after finding excellent training approach
    lit.draw_network(b, w, ax=plt.gca(), colormap=plt.cm.Greens)
    return w,b

In [None]:
# Bellis edits
def flatten_2d_lists(arr2d):
    vals = []
    nrows = len(arr2d)
    ncols = len(arr2d[0])
    for i in range(nrows):
        for j in range(ncols):
            vals.append(arr2d[i][j])
            
    return vals

# Flatten all the weights returned by a training 
def flatten_many_weights(weights):
    all_weights = []
    for weight in weights:
        x = flatten_2d_lists(weight)
        all_weights += x

    return all_weights

# Flatten all the biases together
def flatten_many_biases(biases):
    # These should be many 1d lists
    all_biases = []
    for bias in biases:
        for i in range(len(bias)):
            all_biases.append(bias[i])
    return all_biases

def merge_all_weights_and_biases(w,b):
    all_w = flatten_many_weights(w)
    all_b = flatten_many_biases(b)

    all_the_things = all_w + all_b

    return all_the_things


In [None]:
#def list_to_features(list):
#    return [[el] for el in list]

In [None]:
## in a function
   #dataset= []
 # for loop
   # excellent_training - gives w,b
   # merge_all_weights_and_biases - list with len 121
   # list_to_features - list with 121 features
   # dataset.append(list_to_features)

In [None]:
def get_wb_dataset(dataset1, dataset2, nentries_wb=10, num_hidden_layers= (nfeatures,3,8,4)):
    dataset= []
    
    #fig= plt.figure(figsize=(nentries_wb*2, nentries_wb/2))
    fig= plt.figure(figsize=(50,100))
    for i in range(nentries_wb):
        plt.rcParams["figure.figsize"] = [4, 3]
        
        plt.subplot(20,5,i+1)
        #plt.subplot(2,3,i+1)

        w,b= excellent_training(dataset1, dataset2, num_hidden_layers)
        vals = merge_all_weights_and_biases(w,b)
        #entry= list_to_features(vals)
        #dataset.append(entry)
        dataset.append(vals)
    fig.savefig(f"NN_grid_of_{nentries_wb}")
    #plt.close()
    return dataset

In [None]:
# Variables for everything
nentries = 10000
nfeatures = 5

In [None]:
## SUM TO ONE
dataset1 = lit.gen_original_data(nentries, nfeatures, dtype='normal')  #dtype args: 'normal', 'squared', 'relativity'
dataset2 = lit.shuffle_dataset(dataset1)

dataset_normal= get_wb_dataset(dataset1, dataset2, nentries_wb=100, num_hidden_layers= (nfeatures,3,8,4))

In [None]:
## RELATIVITY
dataset1= lit.gen_original_data(nentries, nfeatures, dtype='relativity') #dtype args: 'normal', 'squared', 'relativity'
dataset2= lit.shuffle_dataset(dataset1)

dataset_relativity= get_wb_dataset(dataset1, dataset2, nentries_wb=100, num_hidden_layers= (nfeatures,3,8,4))

In [None]:
print('done')

In [None]:
print(dataset1[0])
print(dataset_normal[0][0:5])

In [None]:
print('nentries:',len(dataset_normal),len(dataset_relativity))
print('nfeatures:',len(dataset_normal[0]),len(dataset_relativity[0]))

In [None]:
#print('shape of entry 1')
#print(np.array(dataset_normal[0]).shape)
#print(np.array(dataset_relativity[0]).shape)

#print('how it should look')
#print(dataset1[0].shape)

In [None]:
## it doesn't need fixing
#print('fixed shape of entry 1')
#print(np.array([dataset_normal[0]]).T.shape)

In [None]:
print('nentries, nfeatures')
print(dataset1.shape)
print(np.array(dataset_normal).shape)
print(np.array(dataset_relativity).shape)
print()
print('shape of entry 1')
print(dataset1[0].shape)
print(np.array(dataset_normal[0]).shape)
print(np.array(dataset_relativity[0]).shape)

In [None]:
nfeatures= len(dataset_normal[0])
for i in range(15):
    print(i)
    #plt.figure()
    excellent_training(np.array(dataset_normal), np.array(dataset_relativity), num_hidden_layers= (nfeatures,80,50,10))

# Gabby making things really complicated Approach

In [None]:
# Variables for everything
nentries = 10000
nfeatures = 5
ntrials= 3

In [None]:
## Sum to One
print('Sum to One')

biases1= []
dataset1= lit.gen_original_data(nentries, nfeatures, dtype='normal')
dataset2= lit.shuffle_dataset(dataset1)

for i in range(ntrials):
    w, b= excellent_training(dataset1, dataset2)
    biases1.append(b)
b1_lists= list(zip(*biases1))

## Relativity
print('Relativity')

biases2= []
dataset1= lit.gen_original_data(nentries, nfeatures, dtype='relativity')
dataset2= lit.shuffle_dataset(dataset1)

for i in range(ntrials):
    w, b= excellent_training(dataset1, dataset2)
    biases2.append(b)
b2_lists= list(zip(*biases2))

## NN
#for i in range(nfeatures):



In [None]:
## NN
#for i in range(nfeatures):
    #print(i)
    #print(b1_lists[i])
    #print()
    #excellent_training(b1_lists[i], b2_lists[i])


In [None]:
# Sum to One
print('Sum to One')
dataset1= lit.gen_original_data(nentries, nfeatures, dtype='normal')
dataset2= lit.shuffle_dataset(dataset1)
w1, b1= excellent_training(dataset1, dataset2)

# Relativity
print('Relativity')
dataset1= lit.gen_original_data(nentries, nfeatures, dtype='relativity')
dataset2= lit.shuffle_dataset(dataset1)
w2, b2= excellent_training(dataset1, dataset2)

In [None]:
b1_lists[0][0]
b2_lists[0][0]
dataset1[0:10][0]

In [None]:
#excellent_training(np.array(b1_lists[0]), np.array(b2_lists[0]))

# Making sure we get a good training set

In [None]:
# Manual Variables
nentries = 10000
nfeatures = 4

# Datasets
dataset1= lit.gen_original_data(nentries, nfeatures, dtype='normal') #dtype args: 'normal', 'squared', 'relativity'
dataset2= lit.shuffle_dataset(dataset1)

alldata,labels= lit.concat_dataset(dataset1, dataset2, wantplots=False)

auc = 0
w,b = None,None

while auc < 0.9:

    w, b, auc = lit.neuralnet(dataset1, dataset2, num_hidden_layers=(nfeatures, 3, 8, 4), wantplots=False)

    print(f"auc: {auc}")

# Only draw the network after it find a good training approach
lit.draw_network(b, w, figsize=(6, 6), colormap=plt.cm.Greens)



In [None]:
# Manual Variables
nentries = 10000
nfeatures = 4

# Datasets
dataset1= lit.gen_original_data(nentries, nfeatures, dtype='relativity') #dtype args: 'normal', 'squared', 'relativity'
dataset2= lit.shuffle_dataset(dataset1)

alldata,labels= lit.concat_dataset(dataset1, dataset2, wantplots=False)

auc = 0
w,b = None,None

while auc < 0.9:

    w, b, auc = lit.neuralnet(dataset1, dataset2, num_hidden_layers=(nfeatures, 3, 8, 4), wantplots=False)

    print(f"auc: {auc}")

# Only draw the network after it find a good training approach
lit.draw_network(b, w, figsize=(6, 6), colormap=plt.cm.Greens)

