# Machine Learning Assignment
## Depression classification model

Import all the modules and libraries

In [13]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
import pickle as pk

The code below is to train data using GPU instead of CPU

In [14]:
physical_devices=tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0],True)

Read data in the csv file

In [29]:
data = pd.read_csv("datasetDepress.csv")

Pre-process some data to remove unused data, convert data type and 

In [30]:
data.dropna(inplace=True)
y = data['Condition']
data.drop('Condition', axis=1, inplace=True)

Reformat the type of all the data to float32

In [31]:
data.describe()

Unnamed: 0,Jantina (0:Perempuan. 1:Lelaki),"Bangsa (0:Melayu, 1:Cina, 2:India, 3:Lain-lain)",Umur,"Status perkahwinan (0:Bujang, 1:Berkahwin, 2:Bercerai, 3:Kematian pasangan)","Kategori Jumlah anak (0:0, 1:1-2, 2:3-4, 3:5+)","Tahap pendidikan (0:Sekolah Menengah, 1:Diploma atau Sijil, 2:Ijazah Sarjana Muda, 3:Ijazah Sarjana atau Doktor Falsafah)","Status kerja (0:Sepenuh masa atau tetap, 1:Separuh masa, 2:Sambung belajar, 3:Suri rumah, 4:Tidak bekerja)",Tahap kesihatan,1 Kesedihan,2 Pesimis,...,11 Sakit hati,12 Hilang minat,13 Sukar buat keputusan,14 Tak berguna,15 Hilang tenaga,16 Perubahan tidur,17 Terganggu,18 Perubahan selera,19 Masalah berat badan,20 Risau keadaan fizikal
count,320.0,320.0,320.0,320.0,320.0,320.0,320.0,320.0,320.0,320.0,...,320.0,320.0,320.0,320.0,320.0,320.0,320.0,320.0,320.0,320.0
mean,0.1,0.084375,24.33125,0.175,0.203125,1.521875,1.721875,2.665625,1.16875,1.2625,...,1.325,1.371875,1.128125,1.521875,1.25625,0.809375,1.2875,0.728125,0.215625,0.76875
std,0.30047,0.470849,5.483771,0.419733,0.612812,0.733965,1.510929,0.522898,0.832128,0.930079,...,1.035725,1.02132,0.979007,1.195402,0.861921,0.901708,0.959281,0.758126,0.543188,0.727618
min,0.0,0.0,15.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,20.75,0.0,0.0,1.0,0.0,2.0,1.0,1.0,...,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
50%,0.0,0.0,23.0,0.0,0.0,2.0,2.0,3.0,1.0,1.0,...,1.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,0.0,1.0
75%,0.0,0.0,26.25,0.0,0.0,2.0,3.0,3.0,2.0,2.0,...,2.0,2.0,2.0,3.0,2.0,1.0,2.0,1.0,0.0,1.0
max,1.0,3.0,51.0,3.0,3.0,3.0,4.0,3.0,3.0,3.0,...,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0


In [32]:
column_names = data.columns

for name in column_names:
    data[name] =data[name].astype('float32')

In [33]:
data.head(5)

Unnamed: 0,Jantina (0:Perempuan. 1:Lelaki),"Bangsa (0:Melayu, 1:Cina, 2:India, 3:Lain-lain)",Umur,"Status perkahwinan (0:Bujang, 1:Berkahwin, 2:Bercerai, 3:Kematian pasangan)","Kategori Jumlah anak (0:0, 1:1-2, 2:3-4, 3:5+)","Tahap pendidikan (0:Sekolah Menengah, 1:Diploma atau Sijil, 2:Ijazah Sarjana Muda, 3:Ijazah Sarjana atau Doktor Falsafah)","Status kerja (0:Sepenuh masa atau tetap, 1:Separuh masa, 2:Sambung belajar, 3:Suri rumah, 4:Tidak bekerja)",Tahap kesihatan,1 Kesedihan,2 Pesimis,...,11 Sakit hati,12 Hilang minat,13 Sukar buat keputusan,14 Tak berguna,15 Hilang tenaga,16 Perubahan tidur,17 Terganggu,18 Perubahan selera,19 Masalah berat badan,20 Risau keadaan fizikal
0,0.0,3.0,20.0,0.0,0.0,1.0,2.0,1.0,2.0,3.0,...,3.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,0.0,0.0
1,0.0,0.0,20.0,0.0,0.0,2.0,2.0,3.0,1.0,2.0,...,3.0,2.0,0.0,3.0,1.0,1.0,0.0,2.0,0.0,0.0
2,0.0,0.0,26.0,0.0,0.0,2.0,0.0,3.0,2.0,1.0,...,1.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,0.0,0.0
3,0.0,0.0,27.0,0.0,0.0,1.0,0.0,2.0,1.0,1.0,...,1.0,0.0,1.0,0.0,2.0,3.0,2.0,2.0,2.0,2.0
4,0.0,0.0,18.0,0.0,0.0,1.0,2.0,2.0,3.0,3.0,...,3.0,3.0,2.0,3.0,2.0,3.0,3.0,3.0,2.0,0.0


Split the data into training set and testing set

In [35]:
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.33, random_state=42, shuffle=True)

Start to train the data

In [36]:
model = svm.SVC(decision_function_shape='ovo')
model.fit(X_train, y_train)

Predict accuracy and print out

In [37]:
accuracy = model.predict(X_test)
metrics.accuracy_score(accuracy, y_test)


0.6886792452830188

Export the model into the sav file

In [38]:
filename = "depression_model.sav"
pk.dump(model, open(filename, 'wb'))