In [29]:
import pandas as pd
from matplotlib import pyplot as plt
import csv
import seaborn as sns
import math
import numpy as np

In [30]:
data = pd.read_csv("/Users/Nika/Desktop/Stress-Lysis.csv")
df = pd.DataFrame(data, columns=["Humidity", "Temperature", "Step_count", "Stress_Level"])

I split the data into training and testing sets by choosing random samplses. (80 percent for train and 20 percent for test):

In [31]:
mask = np.random.rand(len(df)) <= 0.8
training_data = df[mask]
testing_data = df[~mask]

I define several functions in the code.
First, I implement the **sigmoid** function as follows:

In [32]:
def sigmoid(x):
    sigmoid = 1 / (1 + np.exp(-x))
    return sigmoid

I implement a function for calculating the **weights** in each iteration.


I define a certian **learning rate** and **number of iterations**, and by having the temperature and humidity of the training data as the features, along with using the **gradient descent** method, I compute the weights by using the corresponding mathematical expressions.

In [33]:
def w_calc(data, label):
    eta = 0.00000001 
    n_iterations = 200

    n = len(data)
    w = np.random.randn(2,1)
    w_transpose = w.transpose()

    temperature = data['Temperature'].values.tolist()
    humidity = data['Humidity'].values.tolist()

    features = np.ones((len(temperature),2))
    for i in range(len(temperature)):
        features[i][0] = temperature[i]
        features[i][1] = humidity[i]

    label_list = data['Stress_Level'].values.tolist()
    yi = []
    for i in range(len(label_list)):
        if(label_list[i] == label):
            yi.append(1)
        else:
            yi.append(0)

    x = np.ones((1,2))
    for j in range(n_iterations):
        gradients = 0
        for i in range(len(temperature)):
            x = features[i]
            x = np.reshape(x,(len(x),1))
            multiplication = np.dot(w_transpose,x)
            gradients+=x * (yi[i]-sigmoid(multiplication))
        w = w + (eta * gradients)
    
    return w

I define the final function as the classifier:


I compute w for each class and by having the temprature and humidity of the testing data, I estimate a label for each of the testing samples. I use **one vs all** logistic regression by comparing the probability of each class, and then choose the class that has **max probability** as the estimated class.

In [34]:
def classifier(training_data, test_data):
    w_high = w_calc(training_data, 'high')
    w_high = w_high.transpose()
    w_mid = w_calc(training_data, 'mid')
    w_mid = w_mid.transpose()
    w_low = w_calc(training_data, 'low')
    w_low = w_low.transpose()

    temperature = test_data['Temperature'].values.tolist()
    humidity = test_data['Humidity'].values.tolist()

    features = np.ones((len(temperature),2))
    label = []
    for i in range(len(temperature)):
        features[i][0] = temperature[i]
        features[i][1] = humidity[i]

        x = features[i]
        x = np.reshape(x,(len(x),1))

        probability_high = sigmoid(np.dot(w_high,x))
        probability_mid = sigmoid(np.dot(w_mid,x))
        probability_low = sigmoid(np.dot(w_low,x))

        if ((probability_high >= probability_low) and (probability_high >= probability_mid)):
            label.append('high')
        elif ((probability_low >= probability_high) and (probability_low >= probability_mid)):
            label.append('low')
        else:
            label.append('mid')

    return label

Computing the 3x3 **confusion matrix** as follows:

In [35]:
classification = classifier(training_data, testing_data)
T1=F12=F21=T2=T3=F13=F31=F32=F23=0
test_list = testing_data['Stress_Level'].values.tolist()

for i in range (len(test_list)):
    if (test_list[i] == "high" and classification[i] == "high"):
        T1+=1
    elif(test_list[i] == "high" and classification[i] == "mid"):
        F12+=1
    elif(test_list[i] == "high" and classification[i] == "low"):
        F13+=1
    if (test_list[i] == "mid" and classification[i] == "mid"):
        T2+=1
    elif(test_list[i] == "mid" and classification[i] == "low"):
        F23+=1
    elif(test_list[i] == "mid" and classification[i] == "high"):
        F21+=1
    if (test_list[i] == "low" and classification[i] == "low"):
        T3+=1
    elif(test_list[i] == "low" and classification[i] == "mid"):
        F32+=1
    elif(test_list[i] == "low" and classification[i] == "high"):
        F31+=1


Confusion_Matrix=[[T1,F12,F13],[F21,T2,F23],[F31,F32,T3]]
print("Confusion Matrix = ", Confusion_Matrix)
accuracy = (T1+T2+T3)/(T1+T2+T3+F21+F12+F23+F32+F13+F31)
print("accuracy = ", accuracy)

Confusion Matrix =  [[130, 0, 0], [156, 0, 0], [103, 0, 0]]
accuracy =  0.3341902313624679


By computing accuracy and recall, we can compute the **F1 Score**:

In [36]:
recall = T1 / (F12 + F13 + T1)
print("F1 score =", 2/((1/recall) + (1/accuracy)))

F1 score = 0.5009633911368016


The **Jaccard Score**:

In [37]:
jaccard = T1/(T1+F21+F12+F13+F31)
print("jaccard score = ",jaccard)

jaccard score =  0.3341902313624679
