# Neural Network Method
by Zack Hudgins

In [1]:
import numpy as np
import pandas as pd

Preparing the dataset for training. 

In [2]:
# Import Dataset
data_og = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/00373/drug_consumption.data', sep=",", names=['Age','Gender','Education','Country','Ethnicity','Nscore','Escore','Oscore','Ascore','Cscore','Impulsive','SS','Alcohol','Amphet','Amyl','Benzos','Caff','Cannabis','Choc','Coke','Crack','Ecstasy','Heroin','Ketamine','Legalh','LSD','Meth','Mushrooms','Nicotine','Semer','VSA'])
data_og.head()

data = pd.read_csv('drugs.csv')

In [3]:
d = data_og
d.drop(d[d['Semer'] != 'CL0'].index, inplace = True)
d = d.drop('Semer', 1)
cannabis = d['Cannabis']
d = d.drop(['Cannabis'], 1)

  d = d.drop('Semer', 1)
  d = d.drop(['Cannabis'], 1)



Applying one-hot encoding for input data. The first model will utilize all of the provided data from the dataset in order to predict the recency of cannabis use, split into 7 classes, making this a multiclass classifier.

In [4]:

drug_labels = ['Alcohol','Amphet','Amyl','Benzos','Caff','Choc','Coke','Crack','Ecstasy','Heroin','Ketamine','Legalh','LSD','Meth','Mushrooms','Nicotine','VSA']
personality_labels = ['Nscore','Escore','Oscore','Ascore','Cscore','Impulsive','SS']
demographic_labels = ['Age','Gender','Education','Country','Ethnicity']
sch_labels = ['Sch1','Sch2','Sch3','Sch4','SchNA']
for drug in drug_labels:
    # not a user = 0, user = 1
    d[drug] = np.where(d[drug] == 'CL0', 0, 1)
    
    
x = d

#for p in personality_labels:
    #x[p] = x[p].astype('category').cat.codes.to_numpy()
x = pd.get_dummies(x, columns=(personality_labels))
    
#for d in demographic_labels:
#    x[d] = x[d].astype('category').cat.codes.to_numpy()
x = pd.get_dummies(x, columns=(demographic_labels))


Applying one-hot encoding of classes (CL0 - CL6) for target output.

In [5]:
targets = cannabis.astype('category').cat.codes.to_numpy()
y = np.zeros((7,len(targets)))
for m in range(len(targets)):
    y[targets[m], m] = 1


y = y.T

Split data into train, test, and validation.

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5)

print(X_train.shape, X_test.shape, X_val.shape)
print(y_train.shape, y_test.shape, y_val.shape)

(1501, 277) (188, 277) (188, 277)
(1501, 7) (188, 7) (188, 7)


Creating and training the model, using a semi-arbitrary choice of 1 hidden layer of 12 nodes.

In [7]:
from sklearn.neural_network import MLPClassifier

print(X_train.shape[1], 'input nodes, ', y_train.shape[1], 'output nodes')
clf = MLPClassifier(solver='sgd', activation='relu',
                    hidden_layer_sizes=(12,), random_state=1, max_iter = 500)
clf.fit(X_train, y_train)

277 input nodes,  7 output nodes




MLPClassifier(hidden_layer_sizes=(12,), max_iter=500, random_state=1,
              solver='sgd')

Using .score results in 27.1% accuracy. However, this is a  multi-class classifier, so some of its predictions have multiple output neurons active and some have none. Using argmax of each probability prediction is a better indicator for our output with only a single class, which brings the accuracy up to 46.3%.

In [8]:
from sklearn.metrics import accuracy_score, confusion_matrix

accuracy = clf.score(X_test, y_test)
print("Accuracy: ",accuracy) 


Accuracy:  0.2712765957446808


In [9]:
probs = clf.predict_log_proba(X_test)

y_test_am = y_test.argmax(axis=1)
probs_am = probs.argmax(axis=1)
print("Confusion Matrix: \n", confusion_matrix(y_test_am, probs_am))

correct = 0
for i in range(len(y_test_am)):
    if y_test_am[i] == probs_am[i]:
        correct += 1
print("\nAccuracy: ", correct/probs.shape[0])


Confusion Matrix: 
 [[33  2  2  0  0  0  1]
 [10  3  4  0  0  0  4]
 [11  4  7  0  0  0 16]
 [ 0  0  3  0  0  0 13]
 [ 1  0  0  0  0  0  6]
 [ 2  0  0  0  0  0 16]
 [ 2  2  2  0  0  0 44]]

Accuracy:  0.4627659574468085


---------------
<b>Multiclass #1</b>

The following tests are run to determine accuracies of various hidden layer and number of node setups for this same classifier (above), as a sort of coarse tuning of hyper-paramters to determine the best choice for this model.

Thus, by choosing the best-performing hidden layer structure, the model was able to predict the correct classification with  <b>50.5% accuracy on the testing dataset</b>.

In [10]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5)


#Outputs the argmax-accuracies of various hidden layer setups for the NN.
#The dimension of the array corresponds to the number of hidden layers used.
#The index relates to the number of nodes in a given hidden layer per the formula (2^index).
#For example, the accuracy at index (3, 5) indicates the accuracy of a model with 2 hidden layers of 8 and then 32 nodes.
def hidden_layer_accuracies(X_train, X_test, y_train, y_test):
    accuracies = []
    accuracies_am = []

    for power in range(7):
        clf = MLPClassifier(solver='sgd', activation='relu',
                            hidden_layer_sizes=(2**power,), random_state=1, max_iter = 500)
        clf.fit(X_train, y_train)

        accuracy = clf.score(X_test, y_test)
        #print("Accuracy: ",accuracy) 
        accuracies.append(accuracy)

        probs = clf.predict_log_proba(X_test)

        correct = 0
        y_test_am = y_test.argmax(axis=1)
        probs_am = probs.argmax(axis=1)
        for i in range(len(y_test_am)):
            if y_test_am[i] == probs_am[i]:
                correct += 1
        #print("Confusion Matrix: \n", confusion_matrix(y_test_am, probs_am))
        #print("\nAccuracy: ", correct/probs.shape[0])
        accuracies_am.append(correct/probs.shape[0])

    print("1 Hidden Layer")
    print("\nAccuracies:",accuracies)
    print("\nAccuracies with argmax applied:",accuracies_am)

    accuracies2 = np.zeros((7,7))
    accuracies2_am = np.zeros((7,7))
    for power1 in range(7):
        for power2 in range(7):
            clf = MLPClassifier(solver='sgd', activation='relu',
                                hidden_layer_sizes=(2**power1, 2**power2), random_state=1, max_iter = 500)
            clf.fit(X_train, y_train)

            accuracy = clf.score(X_test, y_test)
            #print("Accuracy: ",accuracy) 
            accuracies2[power1, power2] = accuracy

            probs = clf.predict_log_proba(X_test)

            correct = 0
            y_test_am = y_test.argmax(axis=1)
            probs_am = probs.argmax(axis=1)
            for i in range(len(y_test_am)):
                if y_test_am[i] == probs_am[i]:
                    correct += 1
            #print("Confusion Matrix: \n", confusion_matrix(y_test_am, probs_am))
            #print("\nAccuracy: ", correct/probs.shape[0])
            accuracies2_am[power1, power2] = correct/probs.shape[0]

    print("\n\n2 Hidden Layers")
    print("\nAccuracies:",accuracies2)
    print("\nAccuracies with argmax applied:",accuracies2_am)
    
hidden_layer_accuracies(X_train, X_test, y_train, y_test)



1 Hidden Layer

Accuracies: [0.0, 0.16489361702127658, 0.25, 0.2553191489361702, 0.2553191489361702, 0.25, 0.24468085106382978]

Accuracies with argmax applied: [0.28191489361702127, 0.48936170212765956, 0.4787234042553192, 0.4787234042553192, 0.48936170212765956, 0.4787234042553192, 0.48404255319148937]






2 Hidden Layers

Accuracies: [[0.         0.18085106 0.15425532 0.15425532 0.16489362 0.15425532
  0.        ]
 [0.         0.         0.         0.         0.2712766  0.2712766
  0.26595745]
 [0.         0.         0.09574468 0.25531915 0.25       0.24468085
  0.28191489]
 [0.         0.27659574 0.         0.25       0.25       0.30319149
  0.24468085]
 [0.         0.17021277 0.24468085 0.2606383  0.31914894 0.26595745
  0.27659574]
 [0.17553191 0.25531915 0.20744681 0.2287234  0.27659574 0.2712766
  0.25      ]
 [0.         0.         0.25       0.28723404 0.25531915 0.25
  0.23404255]]

Accuracies with argmax applied: [[0.28191489 0.4893617  0.48404255 0.4787234  0.47340426 0.4787234
  0.28191489]
 [0.42021277 0.28191489 0.27659574 0.28191489 0.46808511 0.46276596
  0.46808511]
 [0.28191489 0.28191489 0.48404255 0.44680851 0.46276596 0.4787234
  0.45212766]
 [0.28191489 0.48404255 0.28191489 0.45744681 0.46808511 0.49468085
  0.4893617 ]
 [0.28191489 0.48404255 0.46276596 0.478723



<b>Finally, this model recorded an accuracy of 42.6% on the validation set.</b>

In [16]:
clf = MLPClassifier(solver='sgd', activation='relu',
        hidden_layer_sizes=(2**4, 2**4), random_state=1, max_iter = 500)

clf.fit(X_train, y_train)


accuracy = clf.score(X_val, y_val)
print("Accuracy: ",accuracy) 

probs = clf.predict_log_proba(X_val)

correct = 0
y_val_am = y_val.argmax(axis=1)
probs_am = probs.argmax(axis=1)

for i in range(len(y_val_am)):
    if y_val_am[i] == probs_am[i]:
        correct += 1
        
print("Confusion Matrix: \n", confusion_matrix(y_val_am, probs_am))
print("\nAccuracy: ", correct/probs.shape[0])

Accuracy:  0.2872340425531915
Confusion Matrix: 
 [[33  3  5  0  0  0  1]
 [10  5  3  1  0  0  2]
 [ 4  3  8  0  0  0  5]
 [ 1  2 10  0  0  0  8]
 [ 0  0  1  1  0  0  9]
 [ 0  1  3  0  0  0 25]
 [ 1  3  6  0  0  0 34]]

Accuracy:  0.425531914893617




----------
<b>Multiclass #2</b>

The following model will also be a multiclass classifier, but will utilize only the input data relating to usage of other drugs besides cannabis.

For similar reasons to above, argmax was used and various hidden layer setups were tested. This resulted in a best prediction accuracy of <b>50.5% for the test set</b>.

In [19]:
x = d
for l in demographic_labels:
    x = x.drop(l, 1)
    
for l in personality_labels:
    x = x.drop(l, 1)
    
    
targets = cannabis.astype('category').cat.codes.to_numpy()
y = np.zeros((7,len(targets)))
for m in range(len(targets)):
    y[targets[m], m] = 1


y = y.T
print(x.shape, y.shape)


X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5)


#HYPERPARAMETERS - deciding 1 or 2 hidden layers, number of nodes
hidden_layer_accuracies(X_train, X_test, y_train, y_test)


  x = x.drop(l, 1)
  x = x.drop(l, 1)


(1877, 17) (1877, 7)




1 Hidden Layer

Accuracies: [0.0, 0.11702127659574468, 0.12234042553191489, 0.20212765957446807, 0.19148936170212766, 0.19148936170212766, 0.18617021276595744]

Accuracies with argmax applied: [0.2553191489361702, 0.48404255319148937, 0.4787234042553192, 0.48404255319148937, 0.4574468085106383, 0.48404255319148937, 0.48936170212765956]






2 Hidden Layers

Accuracies: [[0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.13829787 0.0106383  0.19680851 0.13829787 0.15957447
  0.19680851]
 [0.         0.         0.12234043 0.12765957 0.13297872 0.13297872
  0.13297872]
 [0.20744681 0.03723404 0.12765957 0.13297872 0.19680851 0.19148936
  0.18085106]
 [0.19680851 0.         0.12765957 0.12765957 0.19680851 0.17021277
  0.17553191]
 [0.         0.11702128 0.17021277 0.19680851 0.18617021 0.18617021
  0.19680851]
 [0.         0.         0.13297872 0.18617021 0.19680851 0.19148936
  0.19148936]]

Accuracies with argmax applied: [[0.25531915 0.25531915 0.25531915 0.25531915 0.25531915 0.25531915
  0.25531915]
 [0.25531915 0.4893617  0.49468085 0.4893617  0.49468085 0.5
  0.5       ]
 [0.25531915 0.25531915 0.4893617  0.49468085 0.5        0.50531915
  0.5       ]
 [0.4893617  0.4893617  0.48404255 0.49468085 0.4787234  0.4893617
  0.48404255]
 [0.4893617  0.4787234  0.49468085 0.49468085 0.4



<b>This model recorded an accuracy of 45.2% on the validation set, which makes this the best performing multiclass classifier.</b>

In [25]:
clf = MLPClassifier(solver='sgd', activation='relu',
        hidden_layer_sizes=(2**2, 2**5), random_state=1, max_iter = 500)

clf.fit(X_train, y_train)


accuracy = clf.score(X_val, y_val)
print("Accuracy: ",accuracy) 

probs = clf.predict_log_proba(X_val)

correct = 0
y_val_am = y_val.argmax(axis=1)
probs_am = probs.argmax(axis=1)

for i in range(len(y_val_am)):
    if y_val_am[i] == probs_am[i]:
        correct += 1
        
print("Confusion Matrix: \n", confusion_matrix(y_val_am, probs_am))
print("\nAccuracy: ", correct/probs.shape[0])

Accuracy:  0.14361702127659576
Confusion Matrix: 
 [[34  0  0  0  0  0  1]
 [12  0  0  0  0  0  9]
 [19  0  1  0  0  0  8]
 [ 2  1  0  0  0  0 23]
 [ 0  0  1  0  0  0  9]
 [ 0  0  0  0  0  0 16]
 [ 2  0  0  0  0  0 50]]

Accuracy:  0.4521276595744681




-----------
<b>Multiclass #3</b>

The following model will also be a multiclass classifier, but will utilize only the input data relating to the schedule of other drugs used besides cannabis.

For similar reasons to above, argmax was used and various hidden layer setups were tested. This resulted in a prediction accuracy of <b>47.3% on the test set</b>.

In [28]:
x = data

for l in demographic_labels:
    x = x.drop(l, 1)
    
for l in personality_labels:
    x = x.drop(l, 1)
    
for l in drug_labels:
    x = x.drop(l, 1)
    
x = x.drop(['Cannabis'], 1)
    
targets = cannabis.astype('category').cat.codes.to_numpy()
y = np.zeros((7,len(targets)))
for m in range(len(targets)):
    y[targets[m], m] = 1


y = y.T

print(x.shape, y.shape)

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5)

#HYPERPARAMETERS - deciding 1 or 2 hidden layers, number of nodes
hidden_layer_accuracies(X_train, X_test, y_train, y_test)

  x = x.drop(l, 1)
  x = x.drop(l, 1)
  x = x.drop(l, 1)
  x = x.drop(['Cannabis'], 1)


(1877, 5) (1877, 7)
1 Hidden Layer

Accuracies: [0.0, 0.0, 0.18617021276595744, 0.18617021276595744, 0.21808510638297873, 0.18617021276595744, 0.21808510638297873]

Accuracies with argmax applied: [0.26063829787234044, 0.26063829787234044, 0.46808510638297873, 0.46808510638297873, 0.46808510638297873, 0.46808510638297873, 0.46808510638297873]






2 Hidden Layers

Accuracies: [[0.         0.         0.         0.         0.         0.
  0.        ]
 [0.         0.         0.         0.21808511 0.21808511 0.21808511
  0.18617021]
 [0.         0.         0.18617021 0.         0.21808511 0.18617021
  0.18617021]
 [0.         0.         0.21808511 0.18617021 0.18617021 0.18617021
  0.18617021]
 [0.         0.18617021 0.21808511 0.21808511 0.21808511 0.21808511
  0.21808511]
 [0.         0.         0.21808511 0.18617021 0.21808511 0.21808511
  0.21808511]
 [0.         0.         0.21808511 0.21808511 0.21808511 0.21808511
  0.21808511]]

Accuracies with argmax applied: [[0.2606383  0.2606383  0.2606383  0.2606383  0.2606383  0.2606383
  0.2606383 ]
 [0.2606383  0.2606383  0.2606383  0.46808511 0.46808511 0.46808511
  0.46808511]
 [0.2606383  0.2606383  0.46808511 0.47340426 0.46808511 0.46808511
  0.46808511]
 [0.2606383  0.2606383  0.46808511 0.46808511 0.46808511 0.46808511
  0.46808511]
 [0.2606383  0.46808511 0.46808511 0.46808

<b>This model recorded an accuracy of 42.6% on the validation set.</b>

In [29]:
clf = MLPClassifier(solver='sgd', activation='relu',
        hidden_layer_sizes=(2**2, 2**3), random_state=1, max_iter = 500)

clf.fit(X_train, y_train)


accuracy = clf.score(X_val, y_val)
print("Accuracy: ",accuracy) 

probs = clf.predict_log_proba(X_val)

correct = 0
y_val_am = y_val.argmax(axis=1)
probs_am = probs.argmax(axis=1)

for i in range(len(y_val_am)):
    if y_val_am[i] == probs_am[i]:
        correct += 1
        
print("Confusion Matrix: \n", confusion_matrix(y_val_am, probs_am))
print("\nAccuracy: ", correct/probs.shape[0])

Accuracy:  0.0
Confusion Matrix: 
 [[30  0  0  0  0  0  3]
 [ 8  0  0  0  0  0 11]
 [ 8  0  0  0  0  0 21]
 [ 4  0  0  0  0  0 22]
 [ 1  0  0  0  0  0 10]
 [ 0  0  0  0  0  0 19]
 [ 1  0  0  0  0  0 50]]

Accuracy:  0.425531914893617




 ----------------------
 
The following models will now instead be used as binary classifiers, determing whether an individual is a user of cannabis or not. We are no longer attempting to classify the recency of use with these models.

---------
<b>Binary classification #1</b>

The first model uses all provided input data to predict this binary classification.

Just as previously done, argmax of each probability prediction was used, which brings the accuracy up slightly from 86.2% to 87.8%. This jump is expected to be only slight due to the greatly reduced number of output classifications available, and so the backwards propogation doesn't reward a non-classification as much in this scenario.

In [31]:
x = d

#for p in personality_labels:
    #x[p] = x[p].astype('category').cat.codes.to_numpy()
x = pd.get_dummies(x, columns=(personality_labels))
    
#for d in demographic_labels:
#    x[d] = x[d].astype('category').cat.codes.to_numpy()
x = pd.get_dummies(x, columns=(demographic_labels))

newtargets = np.where(targets == 0, 0, 1)

y = np.zeros((2,len(newtargets)))
for m in range(len(newtargets)):
    y[newtargets[m], m] = 1
y = y.T


X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5)

#HYPERPARAMETERS - deciding 1 or 2 hidden layers, number of nodes
hidden_layer_accuracies(X_train, X_test, y_train, y_test)



1 Hidden Layer

Accuracies: [0.8404255319148937, 0.8563829787234043, 0.851063829787234, 0.8404255319148937, 0.8563829787234043, 0.8457446808510638, 0.8351063829787234]

Accuracies with argmax applied: [0.851063829787234, 0.8563829787234043, 0.851063829787234, 0.851063829787234, 0.8563829787234043, 0.851063829787234, 0.8563829787234043]








2 Hidden Layers

Accuracies: [[0.83510638 0.86702128 0.7606383  0.85638298 0.84574468 0.7606383
  0.7606383 ]
 [0.7606383  0.7606383  0.85638298 0.85106383 0.84574468 0.84574468
  0.7606383 ]
 [0.7606383  0.7606383  0.84574468 0.86702128 0.85638298 0.86702128
  0.85106383]
 [0.86170213 0.84574468 0.84574468 0.84574468 0.85638298 0.84042553
  0.84042553]
 [0.7606383  0.7606383  0.83510638 0.82978723 0.84042553 0.84042553
  0.85638298]
 [0.85638298 0.85106383 0.83510638 0.86170213 0.85106383 0.85106383
  0.85638298]
 [0.85106383 0.84574468 0.84574468 0.84042553 0.85638298 0.86170213
  0.83510638]]

Accuracies with argmax applied: [[0.84574468 0.86702128 0.7606383  0.85638298 0.84574468 0.7606383
  0.7606383 ]
 [0.7606383  0.7606383  0.85638298 0.85106383 0.84574468 0.84574468
  0.7606383 ]
 [0.7606383  0.7606383  0.84574468 0.86702128 0.87234043 0.86702128
  0.85106383]
 [0.86170213 0.85106383 0.85106383 0.84574468 0.85638298 0.84042553
  0.84042553]
 [0.7606383  0.7606383  0.83510638 



<b>This model recorded an accuracy of 89.9% on the validation set.</b>

In [32]:
clf = MLPClassifier(solver='sgd', activation='relu',
        hidden_layer_sizes=(2**5, 2**3), random_state=1, max_iter = 500)

clf.fit(X_train, y_train)


accuracy = clf.score(X_val, y_val)
print("Accuracy: ",accuracy) 

probs = clf.predict_log_proba(X_val)

correct = 0
y_val_am = y_val.argmax(axis=1)
probs_am = probs.argmax(axis=1)

for i in range(len(y_val_am)):
    if y_val_am[i] == probs_am[i]:
        correct += 1
        
print("Confusion Matrix: \n", confusion_matrix(y_val_am, probs_am))
print("\nAccuracy: ", correct/probs.shape[0])

Accuracy:  0.8936170212765957
Confusion Matrix: 
 [[ 33   9]
 [ 10 136]]

Accuracy:  0.898936170212766




------------------
<b>Binary classification #2</b>

The following model uses only usage of drugs besides cannabis as input variables (no demographics or personality types). 
#This ended up being the most accurate prediction model. 

For similar reasons to above, argmax was used and various hidden layer setups were tested. This resulted in a prediction accuracy of <b>89.9% on the test set</b>.

In [36]:
x = d
for l in demographic_labels:
    x = x.drop(l, 1)
    
for l in personality_labels:
    x = x.drop(l, 1)

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5)

hidden_layer_accuracies(X_train, X_test, y_train, y_test)

  x = x.drop(l, 1)
  x = x.drop(l, 1)


1 Hidden Layer

Accuracies: [0.7393617021276596, 0.8882978723404256, 0.8936170212765957, 0.8829787234042553, 0.8882978723404256, 0.8829787234042553, 0.8829787234042553]

Accuracies with argmax applied: [0.7393617021276596, 0.8882978723404256, 0.8936170212765957, 0.8882978723404256, 0.8882978723404256, 0.8829787234042553, 0.8829787234042553]






2 Hidden Layers

Accuracies: [[0.7393617  0.7393617  0.7393617  0.7393617  0.7393617  0.7393617
  0.7393617 ]
 [0.89361702 0.7393617  0.89361702 0.89361702 0.87765957 0.89361702
  0.89361702]
 [0.88829787 0.89361702 0.87765957 0.88829787 0.88297872 0.88297872
  0.88829787]
 [0.7393617  0.7393617  0.89361702 0.88829787 0.88297872 0.88297872
  0.88829787]
 [0.7393617  0.89361702 0.89361702 0.89361702 0.88829787 0.88829787
  0.87765957]
 [0.7393617  0.89361702 0.89893617 0.88829787 0.88297872 0.88829787
  0.88297872]
 [0.88829787 0.88829787 0.88297872 0.88829787 0.88297872 0.88829787
  0.88829787]]

Accuracies with argmax applied: [[0.7393617  0.7393617  0.7393617  0.7393617  0.7393617  0.7393617
  0.7393617 ]
 [0.89893617 0.7393617  0.89361702 0.89361702 0.87765957 0.89361702
  0.89361702]
 [0.88829787 0.89361702 0.88297872 0.88829787 0.88829787 0.88297872
  0.88829787]
 [0.7393617  0.7393617  0.89361702 0.88829787 0.88829787 0.89361702
  0.88829787]
 [0.7393617  0.89361702 0.89361702 



<b>This model recorded an accuracy of 91.5% on the validation set, which is the strongest performance of these classifiers.</b>

In [37]:
clf = MLPClassifier(solver='sgd', activation='relu',
        hidden_layer_sizes=(2**1, 2**0), random_state=1, max_iter = 500)

clf.fit(X_train, y_train)


accuracy = clf.score(X_val, y_val)
print("Accuracy: ",accuracy) 

probs = clf.predict_log_proba(X_val)

correct = 0
y_val_am = y_val.argmax(axis=1)
probs_am = probs.argmax(axis=1)

for i in range(len(y_val_am)):
    if y_val_am[i] == probs_am[i]:
        correct += 1
        
print("Confusion Matrix: \n", confusion_matrix(y_val_am, probs_am))
print("\nAccuracy: ", correct/probs.shape[0])

Accuracy:  0.9148936170212766
Confusion Matrix: 
 [[ 17  12]
 [  4 155]]

Accuracy:  0.9148936170212766




-------------------------
<b>Binary classification #3</b>

Binary classification of the output using only drug classifications as input variables (no demographics, personality types, or usage of individual drugs)

For similar reasons to above, argmax was used and various hidden layer setups were tested. This resulted in a prediction accuracy of <b>87.8% on the test set</b>.

In [38]:
x = data

for l in demographic_labels:
    x = x.drop(l, 1)
    
for l in personality_labels:
    x = x.drop(l, 1)
    
for l in drug_labels:
    x = x.drop(l, 1)
    
x = x.drop(['Cannabis'], 1)

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5)

hidden_layer_accuracies(X_train, X_test, y_train, y_test)

  x = x.drop(l, 1)
  x = x.drop(l, 1)
  x = x.drop(l, 1)
  x = x.drop(['Cannabis'], 1)


1 Hidden Layer

Accuracies: [0.776595744680851, 0.8404255319148937, 0.8404255319148937, 0.8404255319148937, 0.8138297872340425, 0.8404255319148937, 0.8404255319148937]

Accuracies with argmax applied: [0.776595744680851, 0.8404255319148937, 0.8404255319148937, 0.8404255319148937, 0.8776595744680851, 0.8404255319148937, 0.8404255319148937]






2 Hidden Layers

Accuracies: [[0.77659574 0.77659574 0.77659574 0.77659574 0.77659574 0.77659574
  0.77659574]
 [0.77659574 0.77659574 0.87765957 0.84042553 0.81382979 0.87765957
  0.84042553]
 [0.87765957 0.87765957 0.87765957 0.84042553 0.84042553 0.84042553
  0.84042553]
 [0.77659574 0.77659574 0.84042553 0.84042553 0.84042553 0.84042553
  0.84042553]
 [0.87765957 0.77659574 0.81382979 0.81382979 0.81382979 0.87765957
  0.84042553]
 [0.77659574 0.81382979 0.87765957 0.84042553 0.84042553 0.84042553
  0.84042553]
 [0.77659574 0.77659574 0.87765957 0.81382979 0.84042553 0.84042553
  0.84042553]]

Accuracies with argmax applied: [[0.77659574 0.77659574 0.77659574 0.77659574 0.77659574 0.77659574
  0.77659574]
 [0.77659574 0.77659574 0.87765957 0.84042553 0.84042553 0.87765957
  0.84042553]
 [0.87765957 0.87765957 0.87765957 0.84042553 0.84042553 0.84042553
  0.84042553]
 [0.77659574 0.77659574 0.84042553 0.84042553 0.84042553 0.84042553
  0.84042553]
 [0.87765957 0.77659574 0.8404255

<b>This model recorded an accuracy of 86.2% on the validation set.</b>

In [39]:
clf = MLPClassifier(solver='sgd', activation='relu',
        hidden_layer_sizes=(2**1, 2**2), random_state=1, max_iter = 500)

clf.fit(X_train, y_train)


accuracy = clf.score(X_val, y_val)
print("Accuracy: ",accuracy) 

probs = clf.predict_log_proba(X_val)

correct = 0
y_val_am = y_val.argmax(axis=1)
probs_am = probs.argmax(axis=1)

for i in range(len(y_val_am)):
    if y_val_am[i] == probs_am[i]:
        correct += 1
        
print("Confusion Matrix: \n", confusion_matrix(y_val_am, probs_am))
print("\nAccuracy: ", correct/probs.shape[0])

Accuracy:  0.8617021276595744
Confusion Matrix: 
 [[ 42   3]
 [ 23 120]]

Accuracy:  0.8617021276595744
