In [52]:
import pandas as pd
import sklearn
import numpy as np
from sklearn import datasets, linear_model
import matplotlib.pyplot as plt

In [53]:
dataset= pd.read_csv('/home/srinidhi/Downloads/survey_908.csv')

In [54]:
dataset.shape

(908, 17)

In [55]:
dataset= dataset.rename(columns={'Timestamp':'timestamp',
                            'When something good happens to me, I have people who I like to share the good news with.	':'Q1',
                            'I finish whatever I begin.	':'Q2',
                           'When I do an	activity, I	enjoy it so much that I	lose track of time.	':'Q3',
                           'I get completely absorbed	in what I	am doing.	':'Q4',
                           'I am optimistic about my future	':'Q5',
                           'I keep at	my collegework until I	am	done with it.	':'Q6',
                           'When I have a problem, I have someone	who will be there for me.	':'Q7',
                           'In uncertain times, I expect the	best.	':'Q8',
                           'There are people in my life	who really care about me.	':'Q9',
                           'I think good things are going to	happen to me.	':'Q10',
                           'I have friends that I really care about.	':'Q11',
                           'Once I make a plan to	get something done, I stick to it.':'Q12',
                           'I believe	that	things will work out, no matter how difficult they seem.':'Q13',
                           'I work hard.	':'Q14',
                           'I have a lot of fun':'Q15',
                           'From the below categories which one do you identify with the most':'label'})

In [56]:
dataset[:1]

Unnamed: 0,timestamp,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15,label
0,2017/01/30 11:01:20 AM GMT+5:30,Very Often,Sometimes,Almost never,Almost never,Almost never,Very Often,Almost always,Almost never,Almost always,Almost never,Very Often,Sometimes,Almost never,Almost always,Often,Mentally distressed


In [57]:
dataset[:1]

Unnamed: 0,timestamp,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15,label
0,2017/01/30 11:01:20 AM GMT+5:30,Very Often,Sometimes,Almost never,Almost never,Almost never,Very Often,Almost always,Almost never,Almost always,Almost never,Very Often,Sometimes,Almost never,Almost always,Often,Mentally distressed


In [58]:
dataset=dataset.drop('timestamp', axis=1)

In [59]:
dataset.label[dataset.label=="Mentally distressed"] = 0
dataset.label[dataset.label=="unaffected with life (neutral)"] = 1
dataset.label[dataset.label=="Optimistic and happy with life"] = 2

In [60]:
dataset[dataset=='Almost always'] = 2
dataset[dataset=='Very Often'] = 1
dataset[dataset=='Often'] = 0
dataset[dataset=='Sometimes'] = -1
dataset[dataset=='Almost never'] = -2

In [61]:
dataset = dataset.astype(int)

In [75]:
class Config:
    nn_input_dim = 15
    nn_output_dim = 3
    epsilon = 0.01
    reg_lambda = 0.01


In [76]:
def generate_data():
    np.random.seed(0)
    y = np.array(dataset.label)
    X = np.array(dataset.drop('label',axis=1))
    return X ,y

In [77]:
def visualize(X, y, model):
    plot_decision_boundary(lambda x:predict(model,x), X, y)
def plot_decision_boundary(pred_func, X, y):
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = 0.01
  
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    
    Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
    plt.show()

In [78]:
def calculate_loss(model, X, y):
    num_examples = len(X)
    W1, b1, W2, b2 = model['W1'],model['W2'],model['b1'],model['b2']
    z1 = X.dot(W1)+b1
    a1 = np.tanh(z1)
    z2 = a1.dot(W2)+b2
    exp_scores = np.exp(z2)
    probs = exp_scores / np.sum(exp_scores, axis =1, keepdims = True)
    
    correct_logprobs = -np.log(probs[range(num_examples),y])
    data_loss = np.sum(correct_logprobs)
    data_loss += Config.reg_lambda / 2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
    return 1./num_examples*data_loss

In [79]:
def predict(model, x):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    z1 = x.dot(W1) + b1
    a1 = np.tanh(z1)
    z2 = a1.dot(W2) + b2
    exp_scores = np.exp(z2)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    return np.argmax(probs, axis=1)

In [80]:
def build_model(X,y,nn_hdim,num_passes = 20000,print_loss =False):
    num_examples = len(X)
    np.random.seed(0)
    W1 = np.random.randn(Config.nn_input_dim, nn_hdim) / np.sqrt(Config.nn_input_dim)
    b1 = np.zeros((1, nn_hdim))
    W2 = np.random.randn(nn_hdim, Config.nn_output_dim) / np.sqrt(nn_hdim)
    b2 = np.zeros((1, Config.nn_output_dim))
    model = {}
    for i in range(0, num_passes):
        z1 = X.dot(W1) + b1
        a1 = np.tanh(z1)
        z2 = a1.dot(W2) + b2
        exp_scores = np.exp(z2)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        
        delta3 = probs
        delta3[range(num_examples), y] -= 1
        dW2 = (a1.T).dot(delta3)
        db2 = np.sum(delta3, axis=0, keepdims=True)
        delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
        dW1 = np.dot(X.T, delta2)
        db1 = np.sum(delta2, axis=0)
        
        dW2 += Config.reg_lambda * W2
        dW1 += Config.reg_lambda * W1
        
        W1 += -Config.epsilon * dW1
        b1 += -Config.epsilon * db1
        W2 += -Config.epsilon * dW2
        b2 += -Config.epsilon * db2
        
        model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
        
        return model

In [81]:
def main():
    X, y = generate_data()
    model = build_model(X, y, 50, print_loss=True)
    visualize(X, y, model)

In [82]:
if __name__ == "__main__":
    main()

ValueError: shapes (250000,2) and (15,50) not aligned: 2 (dim 1) != 15 (dim 0)