## Import Required Libraries

In [1]:
import pandas as pd
import pickle as pickle    #used for serializing and de-serializing a Python object structure
#import lightgbm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import  LabelEncoder
from lightgbm import LGBMClassifier     # lightgbm is a gradient boosting framework that uses tree based learning algorithms.
from sklearn.metrics import accuracy_score, f1_score

## Load dataset with pandas

In [2]:
dataset = pd.read_csv('E:\ML\PROJECTS\DEPRESSION\dataset_sd.csv')

## View top-5 rows of dataset

In [17]:
dataset.head(5)

Unnamed: 0,Branch,Year,College,Gender,Do you feel Depressed?,When do you get up in the morning?,What is your sleeping posture?,You start your day with?,How do you work out everyday?,How much time do you spend in exercising daily?,...,Have you often felt like crying for no reason?,How often does your grades and college work suffer because of the amount of time you spend online?,How do you feel after seeing post on success of people on social sites?,How often do you get angry at small things ?,Do you often feel restless and can't keep still ?,Do you feel happy?,Have you blamed yourself unnecessarily when things went wrong?,"Do you prefer to stay at home,rather than going out and doing new things?",Are you satisfied with your life?,Do you think about suicide ?
0,IT,2,BIT,Male,Several days,6 AM- 7AM,Facing right side,Taking tea,,,...,Not at all,A little of time,Neutral,Some of the time,Not at all,Most of the time,Some of the time,Some of the time,Some of the time,No
1,IT,2,BIT,Female,Several days,7AM-8AM,Facing right side,Drinking cold water,Gymnasium,> half an hour,...,Some of the time,Most of the time,Happy and motivated,Some of the time,A little of time,Most of the time,Some of the time,A little of time,A little of time,No
2,IT,2,BIT,Male,Not at all,After 8AM,Facing left sie,Drinking cold water,football,> half an hour,...,Some of the time,Some of the time,Happy and motivated,Most of the time,Not at all,Most of the time,Not at all,Most of the time,Most of the time,No
3,IT,2,BIT,Male,Several days,7AM-8AM,Back,Drinking cold water,Gymnasium,15 min,...,A little of time,A little of time,Neutral,Some of the time,A little of time,Most of the time,Some of the time,A little of time,Most of the time,No
4,IT,2,BIT,Male,Several days,6 AM- 7AM,Back,Drinking cold water,Gymnasium,> half an hour,...,Not at all,Some of the time,Jealous,Some of the time,A little of time,Some of the time,Not very often,A little of time,Some of the time,No


## Get List of all columns

In [3]:
print('Columns of dataset \n',dataset.columns)

Columns of dataset 
 Index(['Branch', 'Year', 'College', 'Gender', 'Do you feel Depressed?',
       'When do you get up in the morning?', 'What is your sleeping posture?',
       'You start your day with?', 'How do you work out everyday?',
       'How much time do you spend in exercising daily?',
       'What is first diet you have after waking up?',
       'Do you take afternoon nap?', 'How do you feel after afternoon nap?',
       'What is average usage duration of electronic gadgets by you?',
       'What is your sleeping pattern?',
       'Do you have trouble in falling asleep?',
       'Do you feel that you are useful and needed?',
       'Moving or speaking so slowly that other people could have noticed',
       'Poor Appetite or overeating',
       'Do you have gradual loss or gain in weight?',
       'Do you feel more irritable than usual?',
       'Do you get tired for no reason?',
       'Do you have any of the following general health conditions?',
       'Have you had nause

## Get Unique value counts of each column in dataset

In [4]:
print('column Unique_column_count')
for col in dataset:
    print(len(dataset[col].unique()), col)

column Unique_column_count
8 Branch
4 Year
2 College
2 Gender
4 Do you feel Depressed?
4 When do you get up in the morning?
5 What is your sleeping posture?
109 You start your day with?
79 How do you work out everyday?
4 How much time do you spend in exercising daily?
161 What is first diet you have after waking up?
2 Do you take afternoon nap?
56 How do you feel after afternoon nap?
4 What is average usage duration of electronic gadgets by you?
4 What is your sleeping pattern?
7 Do you have trouble in falling asleep?
4 Do you feel that you are useful and needed?
4 Moving or speaking so slowly that other people could have noticed
4 Poor Appetite or overeating
3 Do you have gradual loss or gain in weight?
4 Do you feel more irritable than usual?
4 Do you get tired for no reason?
63 Do you have any of the following general health conditions?
8 Have you had nausea?
4 Do you feel that life isn't worth living?
4 Things have been getting on top of me
4 Do you frequently smoke or drink?
4 Do 

## Remove columns with too many unique values (to avoid overfitting)

In [5]:
dropping_cols = ['What is first diet you have after waking up?','How do you work out everyday?','You start your day with?','How do you feel after afternoon nap?', 'Do you have any of the following general health conditions?']
dataset.drop(dropping_cols, axis=1, inplace=True)

## Unique Values of Dataset columns after removing irrelevant columns

In [6]:
print('column Unique_column_values')
for col in dataset:
    print( dataset[col].unique(), col)

column Unique_column_values
['IT' 'CSE' 'Mech' 'ETC' 'Civil' 'EE' 'EEE' 'Others'] Branch
[2 1 3 4] Year
['BIT' 'Others'] College
['Male' 'Female'] Gender
['Several days' 'Not at all' 'Nearly every day'
 'More than half of  the days'] Do you feel Depressed?
['6 AM- 7AM' '7AM-8AM' 'After 8AM' 'Before 6 am'] When do you get up in the morning?
['Facing right side' 'Facing left sie' 'Back' 'Straight'
 'Facing left side'] What is your sleeping posture?
['None' '> half an hour' '15 min' 'half an hour'] How much time do you spend in exercising daily?
['No' 'Yes'] Do you take afternoon nap?
['2-3hours' '> 3hours' '1-2hours' '1 hour'] What is average usage duration of electronic gadgets by you?
['Sound sleep' 'Broken sleep' 'Insomania' 'Sleep with scary or bad dreams'] What is your sleeping pattern?
['Not at all' 'Every day' 'Few days' 'HALF OF A WEEK' 'Situation depends'
 'Very few days' 'Sometimes when i am too tired '] Do you have trouble in falling asleep?
['Some of the time' 'Most of time' 

## Split dataset into features & target dataset

In [7]:
dataset_features = dataset.drop(['Do you think about suicide ?'], axis=1)
dataset_target = pd.DataFrame(dataset['Do you think about suicide ?'])

## Encode Features into incremental numeric values

In [8]:
le = LabelEncoder()

In [9]:
for col in dataset_features:
    dataset_features[col] = le.fit_transform(dataset_features[col])

## Split dataset into train-set & test-set (75% trainset, 25% test-set)

In [28]:
x_train, x_test, y_train, y_test = train_test_split(dataset_features, dataset_target, test_size=0.25, random_state=42)

In [31]:
print ('Train-set shape',x_train.shape, y_train.shape)
print ('Test-set shape', x_test.shape, y_test.shape)

Train-set shape (594, 35) (594, 1)
Test-set shape (198, 35) (198, 1)


## Generate the prediction model with train-set

In [32]:
model = LGBMClassifier(n_estimators=20)

In [33]:
model_fit = model.fit(x_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


## Save the model

In [34]:
model_file = 'sd_model.pkl'

In [35]:
with open(model_file, 'wb') as fp:
     pickle.dump(model_fit, fp)

## Load the Model

In [36]:
model_file = 'sd_model.pkl'

In [37]:
with open(model_file, 'rb') as fp:
     model_fit = pickle.load(fp)

## Prediction on test-set

In [39]:
y_predict = model_fit.predict(x_test)
print (y_predict)

['No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'Suicidal ideas and gestures but no attempt' 'No' 'No' 'No' 'No' 'No'
 'Suicidal ideas and gestures but no attempt' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'Suicidal ideas and gestures but no attempt' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'Suicidal ideas and gestures but no attempt' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'Yes, Attempted' 'No' 'No' 'No' 'No' 'No' 'No

  if diff:


## Prediction Metrics

In [41]:
print ('Accuracy:', accuracy_score(y_test, y_predict))
print ('F1-score:', f1_score(y_test, y_predict, average='macro'))

Accuracy: 0.8484848484848485
F1-score: 0.4790114835505898
