In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix

df = pd.read_csv("drug_consumption.csv", 
                  names=["Age", "Gender", "Education", "Country", "Ethnicity", "Nscore", "Escore", "Oscore", "Ascore", "Cscore", "Impulsive", "SS", "Alcohol", "Amphet", "Amyl", "Benzos", "Caff", "Cannabis", "Choc", "Coke", "Crack", "Ecstasy", "Heroin", "Ketamine", "Legalh", "LSD", "Meth", "Mushrooms", "Nicotine", "Semer", "VSA"])

In [2]:
arr = ["Alcohol", "Amphet", "Amyl", "Benzos", "Caff", "Cannabis", "Choc", "Coke", "Crack", "Ecstasy", "Heroin", "Ketamine", "Legalh", "LSD", "Meth", "Mushrooms", "Nicotine", "Semer", "VSA"]

# remove CL and convert strings to ints
# CL0   - Never used  : 0
# CL1-2 - Former user : 1
# CL3-6 - User        : 2
for i in range(len(arr)):
    df[arr[i]].replace({"CL0": "0", "CL1": "1", "CL2": "1", "CL3": "2", "CL4": "2", "CL5": "2", "CL6": "2"}, inplace=True)
    df[arr[i]] = pd.to_numeric(df[arr[i]])

df

Unnamed: 0,Age,Gender,Education,Country,Ethnicity,Nscore,Escore,Oscore,Ascore,Cscore,...,Ecstasy,Heroin,Ketamine,Legalh,LSD,Meth,Mushrooms,Nicotine,Semer,VSA
1,0.49788,0.48246,-0.05921,0.96082,0.12600,0.31287,-0.57545,-0.58331,-0.91699,-0.00665,...,0,0,0,0,0,0,0,1,0,0
2,-0.07854,-0.48246,1.98437,0.96082,-0.31685,-0.67825,1.93886,1.43533,0.76096,-0.14277,...,2,0,1,0,1,2,0,2,0,0
3,0.49788,-0.48246,-0.05921,0.96082,-0.31685,-0.46725,0.80523,-0.84732,-1.62090,-1.01450,...,0,0,0,0,0,0,1,0,0,0
4,-0.95197,0.48246,1.16365,0.96082,-0.31685,-0.14882,-0.80615,-0.01928,0.59042,0.58489,...,0,0,1,0,0,0,0,1,0,0
5,0.49788,0.48246,1.98437,0.96082,-0.31685,0.73545,-1.63340,-0.45174,-0.30172,1.30612,...,1,0,0,1,0,0,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1884,-0.95197,0.48246,-0.61113,-0.57009,-0.31685,-1.19430,1.74091,1.88511,0.76096,-1.13788,...,0,0,0,2,2,0,0,0,0,2
1885,-0.95197,-0.48246,-0.61113,-0.57009,-0.31685,-0.24649,1.74091,0.58331,0.76096,-1.51840,...,1,0,0,2,2,2,2,2,0,0
1886,-0.07854,0.48246,0.45468,-0.57009,-0.31685,1.13281,-1.37639,-1.27553,-1.77200,-1.38502,...,2,0,1,0,1,0,1,2,0,0
1887,-0.95197,0.48246,-0.61113,-0.57009,-0.31685,0.91093,-1.92173,0.29338,-1.62090,-2.57309,...,2,0,0,2,2,0,2,2,0,0


In [12]:
mlp = MLPClassifier(solver = 'sgd', 
                    random_state = 1, 
                    activation = 'logistic', 
                    learning_rate_init = 0.3, 
                    batch_size = 100, 
                    hidden_layer_sizes = (8, 3), 
                    max_iter = 500)
X = df[["Age", "Gender", "Education", "Country", "Ethnicity", "Nscore", "Escore", "Oscore", "Ascore", "Cscore", "Impulsive", "SS"]]

# X = df[["Age", "Gender", "Education", "Country", "Ethnicity"]]
selected_substances = ["Nscore", "Escore", "Oscore", "Ascore", "Cscore", "Impulsive", "SS"]

conf_matrices = []  # List of of confusion matrices, one for each substance
# selected_substances = ['Alcohol', 'Amphet', 'Cannabis', 'Coke', 'Ecstasy', 'LSD', 'Meth', 'Mushrooms']
df[selected_substances]

Unnamed: 0,Nscore,Escore,Oscore,Ascore,Cscore,Impulsive,SS
1,0.31287,-0.57545,-0.58331,-0.91699,-0.00665,-0.21712,-1.18084
2,-0.67825,1.93886,1.43533,0.76096,-0.14277,-0.71126,-0.21575
3,-0.46725,0.80523,-0.84732,-1.62090,-1.01450,-1.37983,0.40148
4,-0.14882,-0.80615,-0.01928,0.59042,0.58489,-1.37983,-1.18084
5,0.73545,-1.63340,-0.45174,-0.30172,1.30612,-0.21712,-0.21575
...,...,...,...,...,...,...,...
1884,-1.19430,1.74091,1.88511,0.76096,-1.13788,0.88113,1.92173
1885,-0.24649,1.74091,0.58331,0.76096,-1.51840,0.88113,0.76540
1886,1.13281,-1.37639,-1.27553,-1.77200,-1.38502,0.52975,-0.52593
1887,0.91093,-1.92173,0.29338,-1.62090,-2.57309,1.29221,1.22470


In [13]:
for substance in selected_substances:
    y = df[substance]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
    
    mlp.fit(X_train, y_train)
    y_pred = mlp.predict(X_test)
    
    print("--", substance, "--")
    print("Accuracy : ", accuracy_score(y_test, y_pred))
    print("Mean Square Error : ", mean_squared_error(y_test, y_pred))
    conf_matrices.append(confusion_matrix(y_test, y_pred))

ValueError: Unknown label type: (array([-0.05188, -1.05308, -0.14882, ..., -0.58016, -0.34799,  1.49158]),)