In [26]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import numpy as np
import pandas as pd
import pickle
from sklearn import metrics, tree, svm
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold,cross_val_score,train_test_split,LeaveOneOut
from sklearn.naive_bayes import MultinomialNB

from data_load import *
from dictionaries import *

# Define the Model Parameters

model_naming_convention = model-type_encoding_directory_datastructure_column-set_version


example: nb_le_f0_d0_b0_c0_v0 is a  model which is label encoded using data set 1, column set 1, version 1 on the basic untreated data set with no data treatment for a family of multi-label classifiers

model_type will be appended to the front of the model name as it is run through each of the 

Link to model building log: https://docs.google.com/spreadsheets/d/1py4RVZ0er_JDeJo-oxY29QT6__EWHIeU6zBgp-q8Wog/edit?usp=sharing

### Define Directory

In [27]:
d0 = 'data/d0.csv'

### Define Columns

In [28]:
c0 = [
        'problem_type', 
        'creative', 
        'outdoors', 
        'career',
        'group_work', 
        'liked_courses', 
        'disliked_courses', 
        'programming',
        'join_clubs', 
        'not_clubs', 
        'liked_projects',
        'disliked_projects',
        'tv_shows', 
        'alternate_degree', 
        'expensive_equipment', 
        'drawing',
        'essay', 
        'architecture', 
        'automotive', 
        'business', 
        'construction',
        'health',
        'environment', 
        'manufacturing', 
        'technology',
        'program'
        ]


c1 = [
        'architecture', 
        'automotive', 
        'business', 
        'construction',
        'health',
        'environment', 
        'manufacturing', 
        'technology',
        'program'
        ]

c2 =    [
        'problem_type', 
        'creative', 
        'outdoors', 
        'career',
        'group_work', 
        'liked_courses', 
        'disliked_courses', 
        'join_clubs', 
        'not_clubs', 
        'liked_projects',
        'disliked_projects',
        'tv_shows', 
        'alternate_degree', 
        'expensive_equipment', 
        'drawing',
        'essay', 
        'architecture', 
        'automotive', 
        'business', 
        'construction',
        'health',
        'environment', 
        'manufacturing', 
        'technology',
        'program'
        ]

c3 = [
    'architecture', 
    'automotive', 
    'business', 
    'construction', 
    'health', 
    'environment', 
    'manufacturing', 
    'technology',
    'program',
    'outdoors',
    'career',
    'liked_courses', 
    'disliked_courses',
    'join_clubs',
    'not_clubs',
    'liked_projects',
    'disliked_projects',
    'drawing'
        ]

c4 = [
        'alternate_degree',
        'architecture', 
        'automotive', 
        'business', 
        'construction',
        'health',
        'environment', 
        'manufacturing', 
        'technology',
        'program'
        ]

c5 = [
        'outdoors',
        'career',
        'programming',
        'join_clubs',
        'not_clubs',
        'liked_projects',
        'disliked_projects',
        'alternate_degree',
        'program'
        ]


c29 = [
        'problem_type', 
        'creative', 
        'outdoors', 
        'career',
        'group_work', 
        'liked_courses', 
        'disliked_courses', 
        'join_clubs', 
        'not_clubs', 
        'liked_projects',
        'disliked_projects',
        'alternate_degree', 
        'expensive_equipment', 
        'drawing',
        'essay', 
        'architecture', 
        'automotive', 
        'business', 
        'construction',
        'health',
        'environment', 
        'manufacturing', 
        'technology',
        'program'
        ]

c36 = ['creative',
       'outdoors',
       'career',
       'group_work',
       'liked_courses',
       'disliked_courses',
       'join_clubs',
       'not_clubs',
       'liked_projects',
       'disliked_projects',
       'alternate_degree',
       'drawing',
       'essay',
       'architecture',
       'automotive',
       'business',
       'construction',
       'health',
       'environment',
       'manufacturing',
       'technology',
       'program'
]

### Define Data Balance Dictionary

In [29]:
b0 = False # this is only relevant when we want to use untreated data for code d0

b1 = {
    'mech': 100,
    'bmed': 100,
    'swe': 100,
    'tron': 100,
    'cive': 100,
    'chem': 100,
    'syde': 100,
    'msci': 100,
    'ce': 100,
    'elec': 100,
    'nano': 100,
    'geo': 100,
    'env': 100,
    'arch-e': 100,
    'arch': 100
    }

b4 = {
    'mech': 100,
    'bmed': 100,
    'swe': 30,
    'tron': 100,
    'cive': 100,
    'chem': 100,
    'syde': 100,
    'msci': 100,
    'ce': 100,
    'elec': 100,
    'nano': 100,
    'geo': 100,
    'env': 100,
    'arch-e': 100,
    'arch': 100
    }


### Define Data Balance Multiple

In [30]:
# Ratio of other programs to program in binary classifier. 2 means double of other programs, 0.5 means half
v0 = 1

 # <font color='red'> Set Up Parameters for the Current Experiment</font> 

In [31]:
#model_name = 'model-type_encoding_directory_datastructure_column-set_version'
# experiment_model_name = 'dataSet_dataBalance_columnSet_dataBalanceMultiple'
experiment_model_name = 'd0_b0_c36_v0'
directory = d0
data_balance = b0
column_list = c36
data_balance_multiple = v0 # Ratio of other programs to program in binary classifier. 2 means double of other programs, 0.5 means half

test_vector = [0] * (len(column_list)-1)
test_vector = np.array(test_vector).reshape(1, -1)

### Define Encoding

 For each new type of encoding defined (other than the default label encoding) we need to define a new list of variables which are to be one hot encoded. This list name should match the encoding code that you will place in the dictionary in the model building google sheet.

 For each new type of encoding created, a new code block needs to be added under each model under each classfier family. Then, copy the code for the one hot encoded models and change the one_hot_encode list to the new list you created for this type of encoding. Once all the code blocks are added, you can run those cells!

In [32]:
ohe =  [
        'problem_type', 
        'creative', 
        'outdoors', 
        'career',
        'group_work', 
        'liked_courses', 
        'disliked_courses', 
        'programming',
        'join_clubs', 
        'not_clubs', 
        'liked_projects',
        'disliked_projects',
        'tv_shows', 
        'alternate_degree', 
        'expensive_equipment', 
        'drawing',
        'essay'
        ]

ohe = [value for value in ohe if value in  column_list]

m0 =  [
        'problem_type', 
        'creative', 
        'outdoors', 
        'career',
        'group_work', 
        'liked_courses', 
        'disliked_courses', 
        'programming',
        'join_clubs', 
        'not_clubs', 
        'liked_projects',
        'disliked_projects',
        'tv_shows', 
        'alternate_degree', 
        'expensive_equipment', 
        'drawing',
        'essay'
        ]
m0 = [value for value in m0 if value in  column_list]

m2 =  [
        'career',
        'liked_courses', 
        'join_clubs', 
        'alternate_degree' 
        ] 
m2 = [value for value in m2 if value in  column_list]

m3 =  [
        'career'
        ] 
m3 = [value for value in m3 if value in  column_list]

m4 =  [
        'liked_courses'
        ] 
m4 = [value for value in m4 if value in  column_list]

m5 =  [
        'join_clubs'
        ] 
m5 = [value for value in m5 if value in  column_list]

m6 =  [
        'alternate_degree' 
        ] 
m6 = [value for value in m6 if value in  column_list]

m7 =  [
        'career',
        'liked_courses'
        ] 
m7 = [value for value in m7 if value in  column_list]

m8 =  [
        'career',
        'join_clubs'
        ] 
m8 = [value for value in m8 if value in  column_list]

m9 =  [
        'career',
        'alternate_degree' 
        ] 
m9 = [value for value in m9 if value in  column_list]

m10 =  [
        'liked_courses', 
        'join_clubs'
        ]
m10 = [value for value in m10 if value in  column_list]

m11 =  [
        'liked_courses',  
        'alternate_degree'
        ] 
m11 = [value for value in m11 if value in  column_list]

m12 =  [
        'join_clubs', 
        'alternate_degree'
        ] 
m12 = [value for value in m12 if value in  column_list]

m13 =  [
        'career',
        'liked_courses', 
        'join_clubs'
        ]
m13 = [value for value in m13 if value in  column_list]

m14 =  [
        'career',
        'join_clubs', 
        'alternate_degree'
        ] 
m14 = [value for value in m14 if value in  column_list]

m15 =  [
        'career',
        'liked_courses', 
        'alternate_degree'
        ] 
m15 = [value for value in m15 if value in  column_list]

m16 =  [
        'liked_courses', 
        'join_clubs', 
        'alternate_degree'
        ] 
m16 = [value for value in m16 if value in  column_list]

## Multilabel Classifiers

### Naive Bayes - Label Encoded

In [115]:
model_name = 'nb_le_f0_'+ experiment_model_name
data = get_label_encoded_data(directory,model_name,column_list,'H',data_balance=data_balance)[0]

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

print(INV_INDEX_PROGRAM[model.predict(test_vector)[0]])

save_model(data,model,cat,model_name)
test_model(model_name,test_vector)

tron
nb_le_f0_d0_b0_c36_v0 created..
Loading CAT file...
Loading model...
Results:
{'arch': 0.0455, 'arch-e': 0.0335, 'bmed': 0.0623, 'ce': 0.1038, 'chem': 0.0583, 'cive': 0.0623, 'elec': 0.0599, 'env': 0.0575, 'geo': 0.0351, 'mech': 0.115, 'msci': 0.0807, 'nano': 0.0599, 'swe': 0.0511, 'syde': 0.0559, 'tron': 0.119}


### Naive Bayes - One Hot Encoded

In [116]:
model_name = 'nb_ohe_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=ohe,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_ohe_f0_d0_b0_c36_v0 created..


### Naive Bayes - m2

In [117]:
model_name = 'nb_m2_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m2,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m2_f0_d0_b0_c36_v0 created..


### Naive Bayes - m3

In [118]:
model_name = 'nb_m3_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m3,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m3_f0_d0_b0_c36_v0 created..


### Naive Bayes - m4

In [119]:
model_name = 'nb_m4_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m4,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m4_f0_d0_b0_c36_v0 created..


### Naive Bayes - m5

In [120]:
model_name = 'nb_m5_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m5,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m5_f0_d0_b0_c36_v0 created..


### Naive Bayes - m6

In [121]:
model_name = 'nb_m6_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m6,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m6_f0_d0_b0_c36_v0 created..


### Naive Bayes - m7

In [122]:
model_name = 'nb_m7_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m7,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m7_f0_d0_b0_c36_v0 created..


### Naive Bayes - m8

In [123]:
model_name = 'nb_m8_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m8,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m8_f0_d0_b0_c36_v0 created..


### Naive Bayes - m9

In [124]:
model_name = 'nb_m9_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m9,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m9_f0_d0_b0_c36_v0 created..


### Naive Bayes - m10

In [125]:
model_name = 'nb_m10_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m10,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m10_f0_d0_b0_c36_v0 created..


### Naive Bayes - m11

In [126]:
model_name = 'nb_m11_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m11,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m11_f0_d0_b0_c36_v0 created..


### Naive Bayes - m12

In [127]:
model_name = 'nb_m12_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m12,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m12_f0_d0_b0_c36_v0 created..


### Naive Bayes - m13

In [128]:
model_name = 'nb_m13_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m13,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m13_f0_d0_b0_c36_v0 created..


### Naive Bayes - m14

In [129]:
model_name = 'nb_m14_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m14,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m14_f0_d0_b0_c36_v0 created..


### Naive Bayes - m15

In [130]:
model_name = 'nb_m15_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m15,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m15_f0_d0_b0_c36_v0 created..


### Naive Bayes - m16

In [131]:
model_name = 'nb_m16_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m16,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_m16_f0_d0_b0_c36_v0 created..


### Logistic Regression - Label Encoded

In [132]:
model_name = 'lrr_le_f0_'+ experiment_model_name
data = get_label_encoded_data(directory,model_name,column_list,'H',data_balance=data_balance)[0]

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

print(INV_INDEX_PROGRAM[model.predict(test_vector)[0]])

save_model(data,model,cat,model_name)
test_model(model_name,test_vector)

msci
lrr_le_f0_d0_b0_c36_v0 created..
Loading CAT file...
Loading model...
Results:
{'arch': 0.0449, 'arch-e': 0.0523, 'bmed': 0.0288, 'ce': 0.0197, 'chem': 0.1369, 'cive': 0.1388, 'elec': 0.0237, 'env': 0.0263, 'geo': 0.0208, 'mech': 0.1142, 'msci': 0.1412, 'nano': 0.1099, 'swe': 0.0513, 'syde': 0.0305, 'tron': 0.0607}




### Logistic Regression - One Hot Encoded

In [133]:
model_name = 'lrr_ohe_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=ohe,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_ohe_f0_d0_b0_c36_v0 created..




### Logistic Regression - m2

In [134]:
model_name = 'lrr_m2_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m2,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m2_f0_d0_b0_c36_v0 created..




### Logistic Regression - m3

In [135]:
model_name = 'lrr_m3_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m3,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m3_f0_d0_b0_c36_v0 created..




### Logistic Regression - m4

In [136]:
model_name = 'lrr_m4_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m4,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m4_f0_d0_b0_c36_v0 created..




### Logistic Regression - m5

In [137]:
model_name = 'lrr_m5_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m5,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m5_f0_d0_b0_c36_v0 created..




### Logistic Regression - m6

In [138]:
model_name = 'lrr_m6_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m6,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m6_f0_d0_b0_c36_v0 created..




### Logistic Regression - m7

In [139]:
model_name = 'lrr_m7_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m7,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m7_f0_d0_b0_c36_v0 created..




### Logistic Regression - m8

In [140]:
model_name = 'lrr_m8_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m8,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m8_f0_d0_b0_c36_v0 created..




### Logistic Regression - m9

In [141]:
model_name = 'lrr_m9_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m9,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m9_f0_d0_b0_c36_v0 created..




### Logistic Regression - m10

In [142]:
model_name = 'lrr_m10_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m10,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m10_f0_d0_b0_c36_v0 created..




### Logistic Regression - m11

In [143]:
model_name = 'lrr_m11_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m11,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m11_f0_d0_b0_c36_v0 created..




### Logistic Regression - m12

In [144]:
model_name = 'lrr_m12_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m12,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m12_f0_d0_b0_c36_v0 created..




### Logistic Regression - m13

In [145]:
model_name = 'lrr_m13_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m13,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m13_f0_d0_b0_c36_v0 created..




### Logistic Regression - m14

In [146]:
model_name = 'lrr_m14_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m14,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m14_f0_d0_b0_c36_v0 created..




### Logistic Regression - m15

In [147]:
model_name = 'lrr_m15_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m15,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m15_f0_d0_b0_c36_v0 created..




### Logistic Regression - m16

In [148]:
model_name = 'lrr_m16_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m16,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
model = LRR.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

lrr_m16_f0_d0_b0_c36_v0 created..




### Support Vector Machine - Label Encoded

In [149]:
model_name = 'svm_le_f0_'+ experiment_model_name
data = get_label_encoded_data(directory,model_name,column_list,'H',data_balance=data_balance)[0]

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

print(INV_INDEX_PROGRAM[model.predict(test_vector)[0]])

save_model(data,model,cat,model_name)
test_model(model_name,test_vector)



mech
svm_le_f0_d0_b0_c36_v0 created..
Loading CAT file...
Loading model...
Results:
{'arch': 0.039, 'arch-e': 0.0323, 'bmed': 0.0875, 'ce': 0.0542, 'chem': 0.0624, 'cive': 0.0905, 'elec': 0.0611, 'env': 0.0289, 'geo': 0.018, 'mech': 0.1601, 'msci': 0.077, 'nano': 0.0727, 'swe': 0.019, 'syde': 0.0652, 'tron': 0.1321}


### Support Vector Machine - One Hot Encoded

In [150]:
model_name = 'svm_ohe_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=ohe,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_ohe_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m2

In [151]:
model_name = 'svm_m2_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m2,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m2_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m3

In [152]:
model_name = 'svm_m3_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m3,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m3_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m4

In [153]:
model_name = 'svm_m4_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m4,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m4_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m5

In [154]:
model_name = 'svm_m5_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m5,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m5_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m6

In [155]:
model_name = 'svm_m6_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m6,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m6_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m7

In [156]:
model_name = 'svm_m7_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m7,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m7_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m8

In [157]:
model_name = 'svm_m8_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m8,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m8_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m9

In [158]:
model_name = 'svm_m9_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m9,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m9_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m10

In [159]:
model_name = 'svm_m10_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m10,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m10_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m11

In [160]:
model_name = 'svm_m11_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m11,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m11_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m12

In [161]:
model_name = 'svm_m12_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m12,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m12_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m13

In [162]:
model_name = 'svm_m13_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m13,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m13_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m14

In [163]:
model_name = 'svm_m14_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m14,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m14_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m15

In [164]:
model_name = 'svm_m15_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m15,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m15_f0_d0_b0_c36_v0 created..


### Support Vector Machine - m16

In [165]:
model_name = 'svm_m16_f0_'+ experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=m16,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

SVM = svm.SVC(probability=True)
model = SVM.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)



svm_m16_f0_d0_b0_c36_v0 created..


## Binary Classifiers

### Naive Bayes -  Label Encoded

In [14]:
model_name = 'nb_le_f1_'+experiment_model_name
data = get_label_encoded_data(directory,model_name,column_list,'H',data_balance=data_balance)[0]
mnb = model_type = MultinomialNB()
binary_classifier(data,model_name,data_balance_multiple,mnb)

nb_le_f1_d0_b4_c36_v0_mech created..
nb_le_f1_d0_b4_c36_v0_bmed created..
nb_le_f1_d0_b4_c36_v0_swe created..
nb_le_f1_d0_b4_c36_v0_ce created..
nb_le_f1_d0_b4_c36_v0_tron created..
nb_le_f1_d0_b4_c36_v0_cive created..
nb_le_f1_d0_b4_c36_v0_chem created..
nb_le_f1_d0_b4_c36_v0_syde created..
nb_le_f1_d0_b4_c36_v0_msci created..
nb_le_f1_d0_b4_c36_v0_elec created..
nb_le_f1_d0_b4_c36_v0_nano created..
nb_le_f1_d0_b4_c36_v0_geo created..
nb_le_f1_d0_b4_c36_v0_env created..
nb_le_f1_d0_b4_c36_v0_arch-e created..
nb_le_f1_d0_b4_c36_v0_arch created..


### Naive Bayes - One Hot Encoded

In [15]:
model_name = 'nb_ohe_f1_'+experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=ohe,column_list = column_list,drop_not_happy='H',data_balance=data_balance)
mnb = model_type = MultinomialNB()
binary_classifier(data,model_name,data_balance_multiple,mnb)

nb_ohe_f1_d0_b4_c36_v0_mech created..
nb_ohe_f1_d0_b4_c36_v0_bmed created..
nb_ohe_f1_d0_b4_c36_v0_swe created..
nb_ohe_f1_d0_b4_c36_v0_ce created..
nb_ohe_f1_d0_b4_c36_v0_tron created..
nb_ohe_f1_d0_b4_c36_v0_cive created..
nb_ohe_f1_d0_b4_c36_v0_chem created..
nb_ohe_f1_d0_b4_c36_v0_syde created..
nb_ohe_f1_d0_b4_c36_v0_msci created..
nb_ohe_f1_d0_b4_c36_v0_elec created..
nb_ohe_f1_d0_b4_c36_v0_nano created..
nb_ohe_f1_d0_b4_c36_v0_geo created..
nb_ohe_f1_d0_b4_c36_v0_env created..
nb_ohe_f1_d0_b4_c36_v0_arch-e created..
nb_ohe_f1_d0_b4_c36_v0_arch created..


### Logistic Regression - Label Encoded

In [16]:
model_name = 'lrr_le_f1_'+experiment_model_name
data = get_label_encoded_data(directory,model_name,column_list,'H',data_balance=data_balance)[0]
LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
binary_classifier(data,model_name,data_balance_multiple,LRR)



lrr_le_f1_d0_b4_c36_v0_mech created..
lrr_le_f1_d0_b4_c36_v0_bmed created..
lrr_le_f1_d0_b4_c36_v0_swe created..
lrr_le_f1_d0_b4_c36_v0_ce created..
lrr_le_f1_d0_b4_c36_v0_tron created..
lrr_le_f1_d0_b4_c36_v0_cive created..




lrr_le_f1_d0_b4_c36_v0_chem created..
lrr_le_f1_d0_b4_c36_v0_syde created..
lrr_le_f1_d0_b4_c36_v0_msci created..
lrr_le_f1_d0_b4_c36_v0_elec created..
lrr_le_f1_d0_b4_c36_v0_nano created..
lrr_le_f1_d0_b4_c36_v0_geo created..
lrr_le_f1_d0_b4_c36_v0_env created..
lrr_le_f1_d0_b4_c36_v0_arch-e created..
lrr_le_f1_d0_b4_c36_v0_arch created..




### Logistic Regression - One Hot Encoded

In [17]:
model_name = 'lrr_ohe_f1_'+experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=ohe,column_list = column_list,drop_not_happy='H',data_balance=data_balance)
LRR = LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial')
binary_classifier(data,model_name,data_balance_multiple,LRR)

lrr_ohe_f1_d0_b4_c36_v0_mech created..
lrr_ohe_f1_d0_b4_c36_v0_bmed created..
lrr_ohe_f1_d0_b4_c36_v0_swe created..
lrr_ohe_f1_d0_b4_c36_v0_ce created..
lrr_ohe_f1_d0_b4_c36_v0_tron created..
lrr_ohe_f1_d0_b4_c36_v0_cive created..
lrr_ohe_f1_d0_b4_c36_v0_chem created..
lrr_ohe_f1_d0_b4_c36_v0_syde created..
lrr_ohe_f1_d0_b4_c36_v0_msci created..
lrr_ohe_f1_d0_b4_c36_v0_elec created..
lrr_ohe_f1_d0_b4_c36_v0_nano created..
lrr_ohe_f1_d0_b4_c36_v0_geo created..
lrr_ohe_f1_d0_b4_c36_v0_env created..
lrr_ohe_f1_d0_b4_c36_v0_arch-e created..
lrr_ohe_f1_d0_b4_c36_v0_arch created..


### Support Vector Machine - Label Encoded

In [18]:
model_name = 'svm_le_f1_'+experiment_model_name
data = get_label_encoded_data(directory,model_name,column_list,'H',data_balance=data_balance)[0]
SVM = svm.SVC(probability=True)
binary_classifier(data,model_name,data_balance_multiple,SVM)



svm_le_f1_d0_b4_c36_v0_mech created..
svm_le_f1_d0_b4_c36_v0_bmed created..
svm_le_f1_d0_b4_c36_v0_swe created..
svm_le_f1_d0_b4_c36_v0_ce created..
svm_le_f1_d0_b4_c36_v0_tron created..
svm_le_f1_d0_b4_c36_v0_cive created..
svm_le_f1_d0_b4_c36_v0_chem created..
svm_le_f1_d0_b4_c36_v0_syde created..
svm_le_f1_d0_b4_c36_v0_msci created..
svm_le_f1_d0_b4_c36_v0_elec created..
svm_le_f1_d0_b4_c36_v0_nano created..
svm_le_f1_d0_b4_c36_v0_geo created..
svm_le_f1_d0_b4_c36_v0_env created..
svm_le_f1_d0_b4_c36_v0_arch-e created..
svm_le_f1_d0_b4_c36_v0_arch created..




### Support Vector Machine - One Hot Encoded

In [19]:
model_name = 'svm_ohe_f1_'+experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=ohe,column_list = column_list,drop_not_happy='H',data_balance=data_balance)
SVM = svm.SVC(probability=True)
binary_classifier(data,model_name,data_balance_multiple,SVM)



svm_ohe_f1_d0_b4_c36_v0_mech created..
svm_ohe_f1_d0_b4_c36_v0_bmed created..
svm_ohe_f1_d0_b4_c36_v0_swe created..
svm_ohe_f1_d0_b4_c36_v0_ce created..
svm_ohe_f1_d0_b4_c36_v0_tron created..




svm_ohe_f1_d0_b4_c36_v0_cive created..
svm_ohe_f1_d0_b4_c36_v0_chem created..
svm_ohe_f1_d0_b4_c36_v0_syde created..
svm_ohe_f1_d0_b4_c36_v0_msci created..




svm_ohe_f1_d0_b4_c36_v0_elec created..
svm_ohe_f1_d0_b4_c36_v0_nano created..
svm_ohe_f1_d0_b4_c36_v0_geo created..
svm_ohe_f1_d0_b4_c36_v0_env created..
svm_ohe_f1_d0_b4_c36_v0_arch-e created..
svm_ohe_f1_d0_b4_c36_v0_arch created..




### Decision Tree -  Label Encoded

In [20]:
model_name = 'tree_le_f1_'+experiment_model_name
data = get_label_encoded_data(directory,model_name,column_list,'H',data_balance=data_balance)[0]
ent = tree.DecisionTreeClassifier()
binary_classifier(data,model_name,data_balance_multiple,ent)

tree_le_f1_d0_b4_c36_v0_mech created..
tree_le_f1_d0_b4_c36_v0_bmed created..
tree_le_f1_d0_b4_c36_v0_swe created..
tree_le_f1_d0_b4_c36_v0_ce created..
tree_le_f1_d0_b4_c36_v0_tron created..
tree_le_f1_d0_b4_c36_v0_cive created..
tree_le_f1_d0_b4_c36_v0_chem created..
tree_le_f1_d0_b4_c36_v0_syde created..
tree_le_f1_d0_b4_c36_v0_msci created..
tree_le_f1_d0_b4_c36_v0_elec created..
tree_le_f1_d0_b4_c36_v0_nano created..
tree_le_f1_d0_b4_c36_v0_geo created..
tree_le_f1_d0_b4_c36_v0_env created..
tree_le_f1_d0_b4_c36_v0_arch-e created..
tree_le_f1_d0_b4_c36_v0_arch created..


### Decision Tree - One Hot Encoded

In [21]:
model_name = 'tree_ohe_f1_'+experiment_model_name
data = get_merged_encoded_data(directory,model_name,one_hot_encode=ohe,column_list = column_list,drop_not_happy='H',data_balance=data_balance)
ent = tree.DecisionTreeClassifier()
binary_classifier(data,model_name,data_balance_multiple,ent)

tree_ohe_f1_d0_b4_c36_v0_mech created..
tree_ohe_f1_d0_b4_c36_v0_bmed created..
tree_ohe_f1_d0_b4_c36_v0_swe created..
tree_ohe_f1_d0_b4_c36_v0_ce created..
tree_ohe_f1_d0_b4_c36_v0_tron created..
tree_ohe_f1_d0_b4_c36_v0_cive created..
tree_ohe_f1_d0_b4_c36_v0_chem created..
tree_ohe_f1_d0_b4_c36_v0_syde created..
tree_ohe_f1_d0_b4_c36_v0_msci created..
tree_ohe_f1_d0_b4_c36_v0_elec created..
tree_ohe_f1_d0_b4_c36_v0_nano created..
tree_ohe_f1_d0_b4_c36_v0_geo created..
tree_ohe_f1_d0_b4_c36_v0_env created..
tree_ohe_f1_d0_b4_c36_v0_arch-e created..
tree_ohe_f1_d0_b4_c36_v0_arch created..


# Leave One Out Validation 

In [8]:
model_name = 'nb_ohe_f0_d0_b1_c36_v0'

In [17]:

data = get_merged_encoded_data(directory,model_name,one_hot_encode=ohe,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

x_df = data.drop(axis=1,columns=["program"])
y_df = data["program"]

X = np.array(x_df) # convert dataframe into np array
Y = np.array(y_df) # convert dataframe into np array

mnb = MultinomialNB()
model = mnb.fit(X, Y) # fit the model using training data

cat = data.drop('program',axis=1)
cat = dict(zip(cat.columns,range(cat.shape[1])))

save_model(data,model,cat,model_name)

nb_ohe_f0_d0_b1_c36_v0 created..


In [33]:
def leave_one_out_score(directory,model_name,one_hot_encode=ohe,column_list = column_list,drop_not_happy='H',data_balance=data_balance):
    data = get_merged_encoded_data(directory,model_name,one_hot_encode=ohe,column_list = column_list,drop_not_happy='H',data_balance=data_balance)

    x_df = data.drop(axis=1,columns=["program"])
    y_df = data["program"]

    X = np.array(x_df) # convert dataframe into np array
    y = np.array(y_df) # convert dataframe into np array

    loo = LeaveOneOut()
    loo.get_n_splits(X)
    LeaveOneOut()

    accuracy = []

    for train_index, test_index in loo.split(X):
        X_train, X_test = pd.DataFrame(X[train_index]), pd.DataFrame(X[test_index]) # use this for training the model
        y_train, y_test = y[train_index].ravel(), y[test_index].ravel() # use this for testing the model
        mnb = MultinomialNB()
        model = mnb.fit(X_train, y_train) # fit the model using training data
        accuracy.append(mnb.score(X_test, y_test))

    # Calculate accuracy
    mean = np.array(accuracy).mean()
    variance = np.array(accuracy).std() * 2
    print("LOO CV Accuracy: %0.2f (+/- %0.2f)" % (mean, variance))
    return mean

LOO CV Accuracy: 0.46 (+/- 1.00)
