In [1]:
import sklearn
import numpy as np
import pandas as pd
import os
import numpy as np
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.neural_network import MLPClassifier

In [2]:
data = pd.read_csv('location_and_weather_simple.csv')

In [3]:
shuffled = data.sample(frac=1)

In [4]:
y = shuffled['labels']
X = shuffled.drop(labels=['Unnamed: 0', 'labels'], axis=1)

In [5]:
X['LATITUDE'].fillna(value=X['LATITUDE'].mean(), inplace=True)
X['LONGITUDE'].fillna(value=X['LONGITUDE'].mean(), inplace=True)
X['temp'].fillna(value=X['temp'].mean(), inplace=True)
X['dwpt'].fillna(value=0, inplace=True)
X['rhum'].fillna(value=X['rhum'].mean(), inplace=True)
X['prcp'].fillna(value=X['prcp'].mean(), inplace=True)
X['wdir'].fillna(value=X['wdir'].mean(), inplace=True)
X['wspd'].fillna(value=0, inplace=True)
X['pres'].fillna(value=0, inplace=True)
X['coco'].fillna(value=0, inplace=True)

#MLP CLASSIFIER USING NEWTON'S METHOD

In [6]:
clf_newton = MLPClassifier(solver='lbfgs', activation='logistic', alpha=1e-5,
                    hidden_layer_sizes=(12, 2), random_state=1)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
...     X, y, test_size=0.33, random_state=42)

In [8]:
clf_newton.fit(X_train, y_train)

MLPClassifier(activation='logistic', alpha=1e-05, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(12, 2), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1, shuffle=True, solver='lbfgs',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [10]:
def accuracy(confusion_matrix):
   diagonal_sum = confusion_matrix.trace()
   sum_of_all_elements = confusion_matrix.sum()
   return diagonal_sum / sum_of_all_elements


y_pred = clf_newton.predict(X_test)
cm = confusion_matrix(y_pred, y_test)
print("Accuracy of MLPClassifier : ", accuracy(cm))

Accuracy of MLPClassifier :  0.5951791298592072


In [11]:
y_test = np.full(len(y_test), y_test)

In [13]:
positives = sum(y_test)
count_1 = 0
count_0 = 0
for i in range(len(y_pred)):
    if y_test[i] == 1:
        if y_pred[i] == 1:
            count_1 += 1
    elif y_pred[i] == 0:
        count_0 += 1

acc_1 = count_1/positives
acc_0 = count_0/(len(y_test)-positives)
balanced_acc = (1/2)*(acc_0+acc_1)
class_acc = np.mean(y_test)*acc_1 + (1-np.mean(y_test))*acc_0

print('A_0: ', acc_0)
print('A_1: ', acc_1)
print('Balanced Accuracy: ', balanced_acc)
print('Class Accuracy: ', class_acc)

A_0:  0.32426788324685546
A_1:  0.8661517524169745
Balanced Accuracy:  0.595209817831915
Class Accuracy:  0.5951791298592072


#MLP CLASSIFIER USING STOCHASTIC GRADIENT DESCENT

In [14]:
clf_sgd = MLPClassifier(solver='sgd', activation='logistic', alpha=1e-5,
                    hidden_layer_sizes=(12, 2), random_state=1)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(
...     X, y, test_size=0.33, random_state=42)

In [16]:
clf_sgd.fit(X_train, y_train)

MLPClassifier(activation='logistic', alpha=1e-05, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(12, 2), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1, shuffle=True, solver='sgd',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [17]:
y_pred = clf_sgd.predict(X_test)
cm = confusion_matrix(y_pred, y_test)
print("Accuracy of MLPClassifier : ", accuracy(cm))

Accuracy of MLPClassifier :  0.5000566320100186


In [18]:
y_test = np.full(len(y_test), y_test)

In [19]:
positives = sum(y_test)
count_1 = 0
count_0 = 0
for i in range(len(y_pred)):
    if y_test[i] == 1:
        if y_pred[i] == 1:
            count_1 += 1
    elif y_pred[i] == 0:
        count_0 += 1

acc_1 = count_1/positives
acc_0 = count_0/(len(y_test)-positives)
balanced_acc = (1/2)*(acc_0+acc_1)
class_acc = np.mean(y_test)*acc_1 + (1-np.mean(y_test))*acc_0

print('A_0: ', acc_0)
print('A_1: ', acc_1)
print('Balanced Accuracy: ', balanced_acc)
print('Class Accuracy: ', class_acc)

A_0:  1.0
A_1:  0.0
Balanced Accuracy:  0.5
Class Accuracy:  0.5000566320100186
