## Import Required Libraries

In [57]:
import pandas as pd
import cPickle as pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import  LabelEncoder
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, f1_score

## Load dataset with pandas

In [2]:
dataset = pd.read_csv('dataset_sd.csv')

## View top-5 rows of dataset

In [3]:
dataset.head(5)

Unnamed: 0,Branch,Year,College,Gender,Do you feel Depressed?,When do you get up in the morning?,What is your sleeping posture?,You start your day with?,How do you work out everyday?,How much time do you spend in exercising daily?,...,Have you often felt like crying for no reason?,How often does your grades and college work suffer because of the amount of time you spend online?,How do you feel after seeing post on success of people on social sites?,How often do you get angry at small things ?,Do you often feel restless and can't keep still ?,Do you feel happy?,Have you blamed yourself unnecessarily when things went wrong?,"Do you prefer to stay at home,rather than going out and doing new things?",Are you satisfied with your life?,Do you think about suicide ?
0,IT,2,BIT,Male,Several days,6 AM- 7AM,Facing right side,Taking tea,,,...,Not at all,A little of time,Neutral,Some of the time,Not at all,Most of the time,Some of the time,Some of the time,Some of the time,No
1,IT,2,BIT,Female,Several days,7AM-8AM,Facing right side,Drinking cold water,Gymnasium,> half an hour,...,Some of the time,Most of the time,Happy and motivated,Some of the time,A little of time,Most of the time,Some of the time,A little of time,A little of time,No
2,IT,2,BIT,Male,Not at all,After 8AM,Facing left sie,Drinking cold water,football,> half an hour,...,Some of the time,Some of the time,Happy and motivated,Most of the time,Not at all,Most of the time,Not at all,Most of the time,Most of the time,No
3,IT,2,BIT,Male,Several days,7AM-8AM,Back,Drinking cold water,Gymnasium,15 min,...,A little of time,A little of time,Neutral,Some of the time,A little of time,Most of the time,Some of the time,A little of time,Most of the time,No
4,IT,2,BIT,Male,Several days,6 AM- 7AM,Back,Drinking cold water,Gymnasium,> half an hour,...,Not at all,Some of the time,Jealous,Some of the time,A little of time,Some of the time,Not very often,A little of time,Some of the time,No


## Get List of all columns

In [66]:
print 'Columns of dataset \n',dataset.columns

Columns of dataset 
Index([u'Branch', u'Year', u'College', u'Gender', u'Do you feel Depressed?',
       u'When do you get up in the morning?',
       u'What is your sleeping posture?',
       u'How much time do you spend in exercising daily?',
       u'Do you take afternoon nap?',
       u'What is average usage duration of electronic gadgets by you?',
       u'What is your sleeping pattern?',
       u'Do you have trouble in falling asleep?',
       u'Do you feel that you are useful and needed?',
       u'Moving or speaking so slowly that other people could have noticed',
       u'Poor Appetite or overeating',
       u'Do you have gradual loss or gain in weight?',
       u'Do you feel more irritable than usual?',
       u'Do you get tired for no reason?', u'Have you had nausea?',
       u'Do you feel that life isn't worth living?',
       u'Things have been getting on top of me',
       u'Do you frequently smoke or drink?',
       u'Do you often suffer from constipation?', u'Relationshi

## Get Unique value counts of each column in dataset

In [63]:
print 'column Unique_column_count'
for col in dataset:
    print col, len(dataset[col].unique())

column Unique_column_count
Branch 8
Year 4
College 2
Gender 2
Do you feel Depressed? 4
When do you get up in the morning? 4
What is your sleeping posture? 5
How much time do you spend in exercising daily? 4
Do you take afternoon nap? 2
What is average usage duration of electronic gadgets by you? 4
What is your sleeping pattern? 4
Do you have trouble in falling asleep? 7
Do you feel that you are useful and needed? 4
Moving or speaking so slowly that other people could have noticed 4
Poor Appetite or overeating 4
Do you have gradual loss or gain in weight? 3
Do you feel more irritable than usual? 4
Do you get tired for no reason? 4
Have you had nausea? 8
Do you feel that life isn't worth living? 4
Things have been getting on top of me 4
Do you frequently smoke or drink? 4
Do you often suffer from constipation? 4
Relationship status 3
Do you worry about the past? 4
Do you feel Nostalgic? 4
Have you often felt like crying for no reason? 4
How often does your grades and college work suffer 

## Remove columns with too many unique values (to avoid overfitting)

In [22]:
dropping_cols = ['What is first diet you have after waking up?','How do you work out everyday?','You start your day with?','How do you feel after afternoon nap?', 'Do you have any of the following general health conditions?']
dataset.drop(dropping_cols, axis=1, inplace=True)

## Unique Values of Dataset columns after removing irrelevant columns

In [64]:
print 'column Unique_column_values'
for col in dataset:
    print col, dataset[col].unique()

column Unique_column_values
Branch ['IT' 'CSE' 'Mech' 'ETC' 'Civil' 'EE' 'EEE' 'Others']
Year [2 1 3 4]
College ['BIT' 'Others']
Gender ['Male' 'Female']
Do you feel Depressed? ['Several days' 'Not at all' 'Nearly every day'
 'More than half of  the days']
When do you get up in the morning? ['6 AM- 7AM' '7AM-8AM' 'After 8AM' 'Before 6 am']
What is your sleeping posture? ['Facing right side' 'Facing left sie' 'Back' 'Straight' 'Facing left side']
How much time do you spend in exercising daily? ['None' '> half an hour' '15 min' 'half an hour']
Do you take afternoon nap? ['No' 'Yes']
What is average usage duration of electronic gadgets by you? ['2-3hours' '> 3hours' '1-2hours' '1 hour']
What is your sleeping pattern? ['Sound sleep' 'Broken sleep' 'Insomania' 'Sleep with scary or bad dreams']
Do you have trouble in falling asleep? ['Not at all' 'Every day' 'Few days' 'HALF OF A WEEK' 'Situation depends'
 'Very few days' 'Sometimes when i am too tired ']
Do you feel that you are useful and 

## Split dataset into features & target dataset

In [25]:
dataset_features = dataset.drop(['Do you think about suicide ?'], axis=1)
dataset_target = pd.DataFrame(dataset['Do you think about suicide ?'])

## Encode Features into incremental numeric values

In [40]:
le = LabelEncoder()

In [42]:
for col in dataset_features:
    dataset_features[col] = le.fit_transform(dataset_features[col])

## Split dataset into train-set & test-set (75% trainset, 25% test-set)

In [44]:
x_train, x_test, y_train, y_test = train_test_split(dataset_features, dataset_target, test_size=0.25, random_state=42)

In [61]:
print 'Train-set shape',x_train.shape, y_train.shape
print 'Test-set shape', x_test.shape, y_test.shape

Train-set shape (594, 35) (594, 1)
Test-set shape (198, 35) (198, 1)


## Generate the prediction model with train-set

In [46]:
model = LGBMClassifier(n_estimators=20)

In [47]:
model_fit = model.fit(x_train, y_train)

## Save the model

In [49]:
model_file = 'sd_model.pkl'

In [50]:
with open(model_file, 'wb') as fp:
     pickle.dump(model_fit, fp)

## Load the Model

In [51]:
model_file = 'sd_model.pkl'

In [52]:
with open(model_file, 'rb') as fp:
     model_fit = pickle.load(fp)

## Prediction on test-set

In [56]:
y_predict = model_fit.predict(x_test)
print y_predict

['No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'Suicidal ideas and gestures but no attempt' 'No' 'No' 'No' 'No' 'No'
 'Suicidal ideas and gestures but no attempt' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'Suicidal ideas and gestures but no attempt' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'Suicidal ideas and gestures but no attempt' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'Suicidal ideas and gestures but no attempt' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'No' 'No' 'No' 'Suicidal ideas and gestures but no attempt' 'No' 'No' 'No'
 'No' 'No' 'No' 'No' 'No' 'No' 'No'
 'Suicidal ideas and gestures but no attempt' 'No' 'No'
 'Suicidal ideas and gestures 

## Prediction Metrics

In [60]:
print 'Accuracy:', accuracy_score(y_test, y_predict)
print 'F1-score:', f1_score(y_test, y_predict, average='macro')

 Accuracy: 0.843434343434
F1-score: 0.528657177689
