In [None]:
import numpy as np
import matplotlib.pylab as plt

import lit_ml_tools as lit

# This will reload modules that have been edited
%load_ext autoreload
%autoreload 2

### Things to Test
- neural net arg for colormap (line 198,355,380,396)
        - 380: weights
        - 396: biases
- look at MLPClassifier
- read about shapely values
- start testing NNs
- be able to explain *everything* - notes after each code block


In [None]:
# Datasets
nentries = 10000
nfeatures = 5

dataset1= lit.gen_original_data(nentries, nfeatures, dtype='normal') #dtype args: 'normal', 'squared', 'relativity'
dataset2= lit.shuffle_dataset(dataset1)


In [None]:
dataset1

In [None]:
# Additional Plots
'''
alldata,labels= lit.concat_dataset(dataset1, dataset2, wantplots=True)

lit.sumfunc(dataset1)
#lit.histfunc(dataset1)
lit.sumfunc(dataset2)
#lit.histfunc(dataset2)

lit.correlations(dataset1, dataset2, label=0, colormap= plt.cm.Greens, wantplots=True, ax1=None)
lit.correlations(dataset1, dataset2, label=1, colormap= plt.cm.Greens, wantplots=True, ax1=None)
'''

In [None]:
nHL= 2    # number of HLs (only up to 2 for now)
nnode= 4  # formerly n_arb
ntrials= 2

print(f'Best Node Pattern(s) for {nHL} Hidden Layers:')
lit.best_node_pattern(dataset1,dataset2,nentries,nfeatures,nnode,ntrials,nHL, wantplots= True) ## only works for up to 2 HLs

# Flattening Approach

In [None]:
def excellent_training(dataset1, dataset2, num_hidden_layers= (nfeatures,3,8,4), colormap=plt.cm.Greens):
    auc = 0
    w,b = None,None

    while auc < 0.9:
        w, b, auc = lit.neuralnet(dataset1, dataset2, num_hidden_layers)

        print(f"auc: {auc}")

    # Draws NN after finding excellent training approach
    lit.draw_network(b, w, ax=plt.gca(), colormap=colormap)
    return w,b

In [None]:
# Bellis edits
def flatten_2d_lists(arr2d):
    vals = []
    nrows = len(arr2d)
    ncols = len(arr2d[0])
    for i in range(nrows):
        for j in range(ncols):
            vals.append(arr2d[i][j])
            
    return vals

# Flatten all the weights returned by a training 
def flatten_many_weights(weights):
    all_weights = []
    for weight in weights:
        x = flatten_2d_lists(weight)
        all_weights += x

    return all_weights

# Flatten all the biases together
def flatten_many_biases(biases):
    # These should be many 1d lists
    all_biases = []
    for bias in biases:
        for i in range(len(bias)):
            all_biases.append(bias[i])
    return all_biases

def merge_all_weights_and_biases(w,b):
    all_w = flatten_many_weights(w)
    all_b = flatten_many_biases(b)

    all_the_things = all_w + all_b

    return all_the_things


In [None]:
def get_wb_dataset(dataset1, dataset2, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (100,10), gridsubplot= (50,20), nentries_wb=10):
    # nentries_wb is for num of entries for w,b dataset

    ## getting excellent NN grid and the w,b dataset
    dataset= [] ##flattened

    #fig= plt.figure(figsize=(nentries_wb*2, nentries_wb/2))
    fig= plt.figure(figsize=gridfigsize)

    for i in range(nentries_wb):
        nrows,ncols= gridsubplot
        plt.rcParams["figure.figsize"] = [4, 3]
        plt.subplot(nrows,ncols,i+1) #change this

        w,b= excellent_training(dataset1, dataset2, num_hidden_layers,colormap=colormap)
        vals = merge_all_weights_and_biases(w,b)
        #entry= list_to_features(vals)
        #dataset.append(entry)
        dataset.append(vals)

    fig.savefig(f"NN_grid_of_{nentries_wb}")
    #plt.close()
    return dataset

In [None]:
def whatdata(nentries, nfeatures, dtype= 'normal', diff_per_iter= False):
    #dtype args: 'normal', 'squared', 'relativity'
    #diff_per_iter= True generates a new dataset per iteration/for each entry; False if same dataset
    dataset1= []
    dataset2= []

    if diff_per_iter == False:
        dataset1 = lit.gen_original_data(nentries, nfeatures, dtype)
        dataset2 = lit.shuffle_dataset(dataset1)

    elif diff_per_iter == True:
        dataset1_hold= []
        for i in range(nentries):
            dataset1x = lit.gen_original_data(1, nfeatures, dtype)
            #print(dataset1x[0])
            dataset1_hold.append(dataset1x[0].tolist())

        dataset1= np.array(dataset1_hold)
        dataset2 = lit.shuffle_dataset(dataset1)

    return dataset1, dataset2


In [None]:
#def list_to_features(list):
#    return [[el] for el in list]

In [None]:
## in a function
   #dataset= []
 # for loop
   # excellent_training - gives w,b
   # merge_all_weights_and_biases - list with len 121
   # list_to_features - list with 121 features
   # dataset.append(list_to_features)

In [None]:
'''
def get_wb_dataset(dataset1, dataset2, nentries_wb=10, num_hidden_layers= (nfeatures,3,8,4)):
    dataset= []
    
    #fig= plt.figure(figsize=(nentries_wb*2, nentries_wb/2))
    fig= plt.figure(figsize=(50,100))  
    for i in range(nentries_wb):
        plt.rcParams["figure.figsize"] = [4, 3]
        
        plt.subplot(50,10,i+1)
        #plt.subplot(2,3,i+1)

        w,b= excellent_training(dataset1, dataset2, num_hidden_layers)
        vals = merge_all_weights_and_biases(w,b)
        #entry= list_to_features(vals)
        #dataset.append(entry)
        dataset.append(vals)
    fig.savefig(f"NN_grid_of_{nentries_wb}")
    #plt.close()
    return dataset
'''

In [None]:
# Variables for everything
nentries = 10000
nfeatures = 5

In [None]:
## SUM TO ONE
dataset1 = lit.gen_original_data(nentries, nfeatures, dtype='normal')  #dtype args: 'normal', 'squared', 'relativity'
dataset2 = lit.shuffle_dataset(dataset1)

dataset_normal= get_wb_dataset(dataset1, dataset2, nentries_wb=1, num_hidden_layers= (nfeatures,3,8,4))
dataset_normal= get_wb_dataset(dataset1, dataset2, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (100,10), gridsubplot= (50,20), nentries_wb=10)

In [None]:
## RELATIVITY
dataset1= lit.gen_original_data(nentries, nfeatures, dtype='relativity') #dtype args: 'normal', 'squared', 'relativity'
dataset2= lit.shuffle_dataset(dataset1)

dataset_relativity= get_wb_dataset(dataset1, dataset2, nentries_wb=100, num_hidden_layers= (nfeatures,3,8,4))

In [None]:
print('done')

In [None]:
print(dataset1[0])
print(dataset_normal[0][0:5])

In [None]:
print('nentries:',len(dataset_normal),len(dataset_relativity))
print('nfeatures:',len(dataset_normal[0]),len(dataset_relativity[0]))

In [None]:
#print('shape of entry 1')
#print(np.array(dataset_normal[0]).shape)
#print(np.array(dataset_relativity[0]).shape)

#print('how it should look')
#print(dataset1[0].shape)

In [None]:
## it doesn't need fixing
#print('fixed shape of entry 1')
#print(np.array([dataset_normal[0]]).T.shape)

In [None]:
print('nentries, nfeatures')
print(dataset1.shape)
print(np.array(dataset_normal).shape)
print(np.array(dataset_relativity).shape)
print()
print('shape of entry 1')
print(dataset1[0].shape)
print(np.array(dataset_normal[0]).shape)
print(np.array(dataset_relativity[0]).shape)

In [None]:
nfeatures= len(dataset_normal[0])
for i in range(15):
    print(i)
    #plt.figure()
    excellent_training(np.array(dataset_normal), np.array(dataset_relativity), num_hidden_layers= (nfeatures,80,50,10))

# Adding to get_wb_dataset

In [None]:
## want subplot size to be an arg
## want option to use same dataset or diff dataset for each iteration

##need to put data generation into get_wb_dataset

In [None]:
nrows,ncols=(50,10)
print(nrows)
print(ncols)

In [None]:
def get_wb_dataset(nentries, nfeatures, dtype= 'normal', num_hidden_layers= (nfeatures,3,8,4), colormap=plt.cm.Greens, gridfigsize= (100,10), gridsubplot= (50,20), nentries_wb=10, diff_per_iter= False):
    # nentries_wb is for num of entries for w,b dataset

    ##
    if diff_per_iter == False:
        dataset1 = lit.gen_original_data(nentries, nfeatures, dtype)
        dataset2 = lit.shuffle_dataset(dataset1)

        ## getting excellent NN grid and the w,b dataset
        dataset= [] ##flattened
        fig= plt.figure(figsize=gridfigsize)

        for i in range(nentries_wb):
            nrows,ncols= gridsubplot
            plt.rcParams["figure.figsize"] = [4, 3]
            plt.subplot(nrows,ncols,i+1)

            w,b= excellent_training(dataset1, dataset2, num_hidden_layers,colormap=colormap)
            vals = merge_all_weights_and_biases(w,b)
            dataset.append(vals)

        fig.savefig(f"NN_grid_of_{nentries_wb}")

    ##
    elif diff_per_iter == True:
        ## getting excellent NN grid and the w,b dataset
        dataset= [] ##flattened
        fig= plt.figure(figsize=gridfigsize)

        for i in range(nentries_wb):
            dataset1 = lit.gen_original_data(nentries, nfeatures, dtype)
            dataset2 = lit.shuffle_dataset(dataset1)
            nrows,ncols= gridsubplot
            plt.rcParams["figure.figsize"] = [4, 3]
            plt.subplot(nrows,ncols,i+1)

            w,b= excellent_training(dataset1, dataset2, num_hidden_layers,colormap=colormap)
            vals = merge_all_weights_and_biases(w,b)
            dataset.append(vals)

        fig.savefig(f"NN_grid_of_{nentries_wb}_{num_hidden_layers}")

    return dataset

In [None]:
def whatdata(nentries, nfeatures, dtype= 'normal', diff_per_iter= False):
    #dtype args: 'normal', 'squared', 'relativity' 
    #diff_per_iter= True generates a new dataset per iteration/for each entry; False if same dataset
    dataset1= []
    dataset2= []

    if diff_per_iter == False:
        dataset1 = lit.gen_original_data(nentries, nfeatures, dtype)  
        dataset2 = lit.shuffle_dataset(dataset1)

    elif diff_per_iter == True:
        dataset1_hold= []
        for i in range(nentries):
            dataset1x = lit.gen_original_data(1, nfeatures, dtype)
            #print(dataset1x[0])
            dataset1_hold.append(dataset1x[0].tolist())

        dataset1= np.array(dataset1_hold)
        dataset2 = lit.shuffle_dataset(dataset1)

    return dataset1, dataset2


In [None]:
# Variables for everything
nentries = 10000
nfeatures = 5

In [None]:
wn_same= get_wb_dataset(nentries, nfeatures, dtype= 'normal', num_hidden_layers= (nfeatures,8,5,2), colormap=plt.cm.Greens, gridfigsize= (5,5), gridsubplot= (1,1), nentries_wb=1, diff_per_iter= False)

In [None]:
len(wn_same[0])

In [None]:
wn_diff= get_wb_dataset(nentries, nfeatures, dtype= 'normal', num_hidden_layers= (nfeatures,3,8,4), colormap=plt.cm.Greens, gridfigsize= (5,5), gridsubplot= (1,1), nentries_wb=1, diff_per_iter= True)


In [None]:
len(wn_diff[0])

In [None]:
## Testing wb datasets against each other
nfeatures= len(wn_same[0])
for i in range(15):
    print(i)
    #plt.figure()
    excellent_training(np.array(wn_same), np.array(wn_diff), num_hidden_layers= (nfeatures,80,50,10))


In [None]:
# Generating datasets
dataset1_n, dataset2_n= whatdata(nentries, nfeatures, dtype= 'normal', diff_per_iter= False)
#dataset1_r, dataset2_r= whatdata(nentries, nfeatures, dtype= 'relativity', diff_per_iter= False)
#dataset1_s, dataset2_s= whatdata(nentries, nfeatures, dtype= 'squared', diff_per_iter= False)

In [None]:
# Generating the grids and w,b datasets
wb_dataset_n= get_wb_dataset(dataset1_n, dataset2_n, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (20,10), gridsubplot= (2,5), nentries_wb=10,colormap=plt.cm.Greens)
print('wb_dataset_n done')
#wb_dataset_r= get_wb_dataset(dataset1_r, dataset2_r, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (50,100), gridsubplot= (50,10), nentries_wb=10)
#print('wb_dataset_r done')
#wb_dataset_s= get_wb_dataset(dataset1_s, dataset2_s, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (50,100), gridsubplot= (50,10), nentries_wb=10)
#print('wb_dataset_s done')

In [None]:
## Testing wb datasets against each other
nfeatures= len(dataset_normal[0])
for i in range(15):
    print(i)
    #plt.figure()
    excellent_training(np.array(dataset_normal), np.array(dataset_relativity), num_hidden_layers= (nfeatures,80,50,10))


# Making Grids

In [None]:
#excellent_training(dataset1, dataset2, num_hidden_layers= (nfeatures,3,8,4), colormap=plt.cm.Greens)

In [None]:
# Parameters
nentries = 10000
nfeatures = 5

In [None]:
# Normal / Same 
dataset1_ns, dataset2_ns= whatdata(nentries, nfeatures, dtype= 'normal', diff_per_iter= False)
get_wb_dataset(dataset1_ns, dataset2_ns, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (30,30), gridsubplot= (6,6), nentries_wb=36)



In [None]:
# Normal / Diff
dataset1_nd, dataset2_nd= whatdata(nentries, nfeatures, dtype= 'normal', diff_per_iter= True)
get_wb_dataset(dataset1_nd, dataset2_nd, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (100,10), gridsubplot= (50,20), nentries_wb=10)




In [None]:
# Relativity / Same
dataset1_rs, dataset2_rs= whatdata(nentries, nfeatures, dtype= 'relativity', diff_per_iter= False)
get_wb_dataset(dataset1_rs, dataset2_rs, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (100,10), gridsubplot= (50,20), nentries_wb=10)




In [None]:
# Relativity / Diff
dataset1_rd, dataset2_rd= whatdata(nentries, nfeatures, dtype= 'relativity', diff_per_iter= True)
get_wb_dataset(dataset1_rd, dataset2_rd, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (100,10), gridsubplot= (50,20), nentries_wb=10)




In [None]:
# Squared / Same
dataset1_ss, dataset2_ss= whatdata(nentries, nfeatures, dtype= 'squared', diff_per_iter= False)
#wb_ss= get_wb_dataset(dataset1_ss, dataset2_ss, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,5,4,2), gridfigsize= (30,30), gridsubplot= (9,3), nentries_wb=9)

nHL= 2    # number of HLs (only up to 2 for now)
nnode= 8  # formerly n_arb
ntrials= 2

print(f'Best Node Pattern(s) for {nHL} Hidden Layers:')
lit.best_node_pattern(dataset1_ss,dataset2_ss,nentries,nfeatures,nnode,ntrials,nHL, wantplots= True) ## only works for up to 2 HLs


In [None]:
wb_ss= get_wb_dataset(dataset1_ss, dataset2_ss, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,8,5), gridfigsize= (30,30), gridsubplot= (9,3), nentries_wb=9)

# Generating and Saving BIG data

### Original Datasets

In [None]:
import pickle

In [None]:
## Same Dataset per Iteration
dataset1_ns, dataset2_ns= whatdata(nentries, nfeatures, dtype= 'normal', diff_per_iter= False)
dataset1_rs, dataset2_rs= whatdata(nentries, nfeatures, dtype= 'relativity', diff_per_iter= False)
dataset1_ss, dataset2_ss= whatdata(nentries, nfeatures, dtype= 'squared', diff_per_iter= False)

##########################################################################################################

## Diff Dataset per Iteration
dataset1_nd, dataset2_nd= whatdata(nentries, nfeatures, dtype= 'normal', diff_per_iter= True)
dataset1_rd, dataset2_rd= whatdata(nentries, nfeatures, dtype= 'relativity', diff_per_iter= True)
dataset1_sd, dataset2_sd= whatdata(nentries, nfeatures, dtype= 'squared', diff_per_iter= True)


In [None]:
## testing
dataset1_t, dataset2_t= whatdata(nentries, nfeatures, dtype= 'normal', diff_per_iter= False)

wb_test= get_wb_dataset(dataset1_t, dataset2_t, nentries_wb=10, num_hidden_layers= (nfeatures,3,8,4))
file = open('wb_normal_test.pkl', 'wb')
pickle.dump(wb_test, file)
file.close()

In [None]:
print(dataset1_t)
print(dataset2_t)

In [None]:
wb_test

In [None]:
wb_testout = pickle.load(open('wb_normal_test.pkl', 'rb'))


In [None]:
wb_testout

In [None]:
lit.draw_network(dataset1_t, dataset2_t, figsize=(6, 6), colormap=plt.cm.Greens)

### WB Datasets

In [None]:
## 3x2 Grid 

## Same Dataset per Iteration
'''
wb_ns= get_wb_dataset(dataset1_ns, dataset2_ns, nentries_wb=1000, num_hidden_layers= (nfeatures,3,8,4))
file = open('wb_normal_same.pkl', 'wb')
pickle.dump(wb_ns, file)
file.close()
print('1 done')
'''

In [None]:
'''
wb_rs= get_wb_dataset(dataset1_rs, dataset2_rs, nentries_wb=1000, num_hidden_layers= (nfeatures,3,8,4))
file = open('wb_relativity_same.pkl', 'wb')
pickle.dump(wb_rs, file)
file.close()
print('2 done')
'''

In [None]:
# running second to last
wb_ss= get_wb_dataset(dataset1_ss, dataset2_ss, nentries_wb=1000, num_hidden_layers= (nfeatures,3,8,4))
file = open('wb_squared_same.pkl', 'wb')
pickle.dump(wb_ss, file)
file.close()
print('3 done')

In [None]:
## Diff Dataset per Iteration
wb_nd= get_wb_dataset(dataset1_nd, dataset2_nd, nentries_wb=1000, num_hidden_layers= (nfeatures,3,8,4))
file = open('wb_normal_diff.pkl', 'wb')
pickle.dump(wb_nd, file)
file.close()
print('4 done')

In [None]:
wb_rd= get_wb_dataset(dataset1_rd, dataset2_rd, nentries_wb=1000, num_hidden_layers= (nfeatures,3,8,4))
file = open('wb_relativity_diff.pkl', 'wb')
pickle.dump(wb_rd, file)
file.close()
print('5 done')

In [None]:
## running last
wb_sd= get_wb_dataset(dataset1_sd, dataset2_sd, nentries_wb=1000, num_hidden_layers= (nfeatures,3,8,4))
file = open('wb_squared_diff.pkl', 'wb')
pickle.dump(wb_sd, file)
file.close()
print('6 done')

In [None]:
print('completely done')

##########################################################################################################







### WB against WB (3x2 grid)

In [None]:
''' 
##this should be excellent_training with same datasets as below
## Same Dataset per Iteration
# normal - relativity
get_wb_dataset(wb_ns, wb_rs, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (50,100), gridsubplot= (50,10), nentries_wb=10)

# relativity - squared
get_wb_dataset(wb_rs, wb_ss, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (50,100), gridsubplot= (50,10), nentries_wb=10)

# squared - normal
get_wb_dataset(wb_ss, wb_ns, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (50,100), gridsubplot= (50,10), nentries_wb=10)

##########################################################################################################

## Diff Dataset per Iteration
# normal - relativity
get_wb_dataset(wb_nd, wb_rd, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (50,100), gridsubplot= (50,10), nentries_wb=10)

# relativity - squared
get_wb_dataset(wb_rd, wb_sd, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (50,100), gridsubplot= (50,10), nentries_wb=10)

# squared - normal
get_wb_dataset(wb_sd, wb_nd, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (50,100), gridsubplot= (50,10), nentries_wb=10)
'''

# Gabby Experimenting

In [None]:
def get_wb_dataset_09(dataset1, dataset2, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (50,100), gridsubplot= (50,10), nentries_wb=10):
    # nentries_wb is for num of entries for w,b dataset

    ## getting excellent NN grid and the w,b dataset
    dataset= [] ##flattened

    #fig= plt.figure(figsize=(nentries_wb*2, nentries_wb/2))
    fig= plt.figure(figsize=gridfigsize)

    for i in range(nentries_wb):
        nrows,ncols= gridsubplot
        plt.rcParams["figure.figsize"] = [4, 3]
        plt.subplot(nrows,ncols,i+1) #change this

        w,b= excellent_training_09(dataset1, dataset2, num_hidden_layers,colormap=colormap)
        vals = merge_all_weights_and_biases(w,b)
        #entry= list_to_features(vals)
        #dataset.append(entry)
        dataset.append(vals)

    fig.savefig(f"NN_grid_of_{nentries_wb}")
    #plt.close()
    return dataset


def get_wb_dataset_06(dataset1, dataset2, colormap=plt.cm.Greens, num_hidden_layers= (nfeatures,3,8,4), gridfigsize= (50,100), gridsubplot= (50,10), nentries_wb=10):
    # nentries_wb is for num of entries for w,b dataset

    ## getting excellent NN grid and the w,b dataset
    dataset= [] ##flattened

    #fig= plt.figure(figsize=(nentries_wb*2, nentries_wb/2))
    fig= plt.figure(figsize=gridfigsize)

    for i in range(nentries_wb):
        nrows,ncols= gridsubplot
        plt.rcParams["figure.figsize"] = [4, 3]
        plt.subplot(nrows,ncols,i+1) #change this

        w,b= excellent_training_06(dataset1, dataset2, num_hidden_layers,colormap=colormap)
        vals = merge_all_weights_and_biases(w,b)
        #entry= list_to_features(vals)
        #dataset.append(entry)
        dataset.append(vals)

    fig.savefig(f"NN_grid_of_{nentries_wb}")
    #plt.close()
    return dataset

In [None]:
def excellent_training_09(dataset1, dataset2, num_hidden_layers= (nfeatures,3,8,4), colormap=plt.cm.Greens):
    auc = 0
    w,b = None,None

    while auc < 0.9:
        w, b, auc = lit.neuralnet(dataset1, dataset2, num_hidden_layers)

        print(f"auc: {auc}")

    # Draws NN after finding excellent training approach
    lit.draw_network(b, w, ax=plt.gca(), colormap=colormap)
    return w,b

def excellent_training_06(dataset1, dataset2, num_hidden_layers= (nfeatures,3,8,4), colormap=plt.cm.Greens):
    auc = 0
    w,b = None,None

    while auc > 0.6:
        w, b, auc = lit.neuralnet(dataset1, dataset2, num_hidden_layers)

        print(f"auc: {auc}")

    # Draws NN after finding excellent training approach
    lit.draw_network(b, w, ax=plt.gca(), colormap=colormap)
    return w,b

In [None]:
# Variables for everything
nentries = 10000
nfeatures = 5

In [None]:
## SUM TO ONE (1)
dataset1 = lit.gen_original_data(nentries, nfeatures, dtype='normal')  #dtype args: 'normal', 'squared', 'relativity'
dataset2 = lit.shuffle_dataset(dataset1)

dataset_normal09= get_wb_dataset_09(dataset1, dataset2, nentries_wb=500, num_hidden_layers= (nfeatures,3,8,4))

In [None]:
## SUM TO ONE (2)
dataset1 = lit.gen_original_data(nentries, nfeatures, dtype='normal')  #dtype args: 'normal', 'squared', 'relativity'
dataset2 = lit.shuffle_dataset(dataset1)

dataset_normal06= get_wb_dataset_06(dataset1, dataset2, nentries_wb=500, num_hidden_layers= (nfeatures,3,8,4))

In [None]:
## can NN distinguish between WB from sumtoone auc<0.6 and auc>0.9 ??

nfeatures= len(dataset_normal09[0])
for i in range(15):
    print(i)
    #plt.figure()
    excellent_training(np.array(dataset_normal09), np.array(dataset_normal06), num_hidden_layers= (nfeatures,80,50,10))

In [None]:
print('done')

In [None]:
## can NN distinguish between WB from same sumtoone and diff sumtoone??





# Writing output to a .csv file

In [None]:
print(len(dataset_normal1))
print(len(dataset_normal1[0]))

outfilename = "dset_normal1.csv"

output = ""
nrows = len(dataset_normal1)
for i in range(nrows):
    line = ",".join(np.array(dataset_normal1[i]).astype(str))
    output += line+"\n"
    
outfile = open(outfilename,"w")
outfile.write(output)
outfile.close()


In [None]:
x = np.loadtxt(outfilename,delimiter=',', skiprows=0, dtype=float, unpack=True)

In [None]:
len(x)

# Save some stuff as pickle

In [None]:
import pickle


In [None]:
# Assuming we have a w and a b
print(w)
print(b)

# open a file, where you ant to store the data
file = open('pickle_test.pkl', 'wb')

# dump information to that file
pickle.dump([w,b], file)

# close the file
file.close()

In [None]:
# Read back in the pickle
wnew,bnew = pickle.load(open('pickle_test.pkl', 'rb'))

In [None]:
print(wnew)
print(bnew)

lit.draw_network(bnew, wnew, figsize=(6, 6), colormap=plt.cm.Greens)


In [None]:
print('done')

# Making sure we get a good training set

In [None]:
# Manual Variables
nentries = 10000
nfeatures = 4

# Datasets
dataset1= lit.gen_original_data(nentries, nfeatures, dtype='normal') #dtype args: 'normal', 'squared', 'relativity'
dataset2= lit.shuffle_dataset(dataset1)

alldata,labels= lit.concat_dataset(dataset1, dataset2, wantplots=False)

auc = 0
w,b = None,None

while auc < 0.9:

    w, b, auc = lit.neuralnet(dataset1, dataset2, num_hidden_layers=(nfeatures, 3, 8, 4), wantplots=False)

    print(f"auc: {auc}")

# Only draw the network after it find a good training approach
lit.draw_network(b, w, figsize=(6, 6), colormap=plt.cm.Greens)



In [None]:
# Manual Variables
nentries = 10000
nfeatures = 4

# Datasets
dataset1= lit.gen_original_data(nentries, nfeatures, dtype='relativity') #dtype args: 'normal', 'squared', 'relativity'
dataset2= lit.shuffle_dataset(dataset1)

alldata,labels= lit.concat_dataset(dataset1, dataset2, wantplots=False)

auc = 0
w,b = None,None

while auc < 0.9:

    w, b, auc = lit.neuralnet(dataset1, dataset2, num_hidden_layers=(nfeatures, 3, 8, 4), wantplots=False)

    print(f"auc: {auc}")

# Only draw the network after it find a good training approach
lit.draw_network(b, w, figsize=(6, 6), colormap=plt.cm.Greens)

