In [1]:
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
import os

In [3]:
depression_df = pd.read_csv(os.path.join("dep2.csv"))
depression_df.head()

Unnamed: 0,sex,age,marital_status,children,household_size,years_of_edu,hh_children,cons_nondurable,asset_durable,asset_phone,...,fs_meat,fs_enoughtom,fs_sleephun,med_sickdays_hhave,med_u5_deaths,ed_expenses,ed_schoolattend,durable_investment,nondurable_investment,depressed
0,1,21.0,0,3,4,10,3,357.81769,208.19981,40.038425,...,3.0,0.0,1.0,1.0,0.0,0.0,0.0,569.85034,48.166222,0.0
1,1,44.0,1,6,8,6,6,233.44154,11.69122,0.0,...,5.0,0.0,0.0,2.75,0.0,18.257523,0.8,252.6501,14.711897,0.0
2,1,23.0,1,1,3,7,1,171.97076,120.91605,56.053795,...,2.0,0.0,1.0,2.666667,0.0,8.6483,1.0,141.73602,0.720692,0.0
3,1,67.0,0,0,1,1,0,37.013428,32.831509,0.0,...,1.0,0.0,1.0,3.0,0.0,0.0,0.0,58.287693,4.804611,1.0
4,1,28.0,1,4,6,10,0,0.0,0.0,0.0,...,3.068881,0.0,0.0,1.438596,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
target = depression_df["depressed"]
target_names = ["Not Depressed", "Depressed"]

In [5]:
data = depression_df.drop("depressed", axis=1)
feature_names = data.columns
data.head()

Unnamed: 0,sex,age,marital_status,children,household_size,years_of_edu,hh_children,cons_nondurable,asset_durable,asset_phone,...,fs_chwholed_often,fs_meat,fs_enoughtom,fs_sleephun,med_sickdays_hhave,med_u5_deaths,ed_expenses,ed_schoolattend,durable_investment,nondurable_investment
0,1,21.0,0,3,4,10,3,357.81769,208.19981,40.038425,...,0.0,3.0,0.0,1.0,1.0,0.0,0.0,0.0,569.85034,48.166222
1,1,44.0,1,6,8,6,6,233.44154,11.69122,0.0,...,0.0,5.0,0.0,0.0,2.75,0.0,18.257523,0.8,252.6501,14.711897
2,1,23.0,1,1,3,7,1,171.97076,120.91605,56.053795,...,0.0,2.0,0.0,1.0,2.666667,0.0,8.6483,1.0,141.73602,0.720692
3,1,67.0,0,0,1,1,0,37.013428,32.831509,0.0,...,0.0,1.0,0.0,1.0,3.0,0.0,0.0,0.0,58.287693,4.804611
4,1,28.0,1,4,6,10,0,0.0,0.0,0.0,...,0.504098,3.068881,0.0,0.0,1.438596,0.0,0.0,0.0,0.0,0.0


In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

In [7]:
# Support vector machine linear classifier
from sklearn.svm import SVC 
model = SVC(kernel='linear')

In [8]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
# Model Accuracy
model.fit(X_train_scaled, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [10]:
training_score = model.score(X_train_scaled, y_train)

In [11]:
testing_score = model.score(X_test_scaled, y_test)

In [12]:
print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")

Training Score: 0.8372093023255814
Testing Score: 0.8118466898954704


In [13]:
# Calculate classification report
from sklearn.metrics import classification_report
predictions = model.predict(X_test)
print(classification_report(y_test, predictions,
                            target_names=target_names))

               precision    recall  f1-score   support

Not Depressed       0.81      0.99      0.89       233
    Depressed       0.33      0.02      0.04        54

     accuracy                           0.81       287
    macro avg       0.57      0.50      0.46       287
 weighted avg       0.72      0.81      0.73       287



In [14]:
predictions = model.predict(X_test_scaled)
predictions

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

In [15]:
predictionvsactual = pd.DataFrame({"Prediction": predictions, "Actual": y_test})

In [21]:
predictionvsactual.head(59)

Unnamed: 0,Prediction,Actual
290,0.0,0.0
1112,0.0,0.0
852,0.0,0.0
413,0.0,0.0
1138,0.0,0.0
107,0.0,0.0
830,0.0,0.0
1125,0.0,0.0
844,0.0,0.0
590,0.0,0.0


In [22]:
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 
  
actual = y_test
predicted = predictions
results = confusion_matrix(predicted, actual)

In [23]:
results

array([[233,  54],
       [  0,   0]], dtype=int64)

In [24]:
sensitivity1 = results[1,1]/(results[0,1]+results[1,1])
print('Sensitivity : ', sensitivity1 )

specificity1 = results[0,0]/(results[0,0]+results[1,0])
print('Specificity : ', specificity1)

Sensitivity :  0.0
Specificity :  1.0
