In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from util_functions import process_files_to_mfccs

In [2]:
# Load data
df_train = process_files_to_mfccs(dataset='training')
df_test = process_files_to_mfccs(dataset='test')

In [3]:
def log_reg(df_train,df_test):
    
    logreg = LogisticRegression()
    
    X_train=df_train.iloc[:,0:df_train.shape[1]-1]
    X_test = df_test.iloc[:,0:df_train.shape[1]-1]
    y_train=df_train['Label']
    
    logreg.fit(X_train, y_train)
    y_pred_train = logreg.predict(X_train)
    y_pred_test = logreg.predict(X_test)

    pred_train_series = pd.Series(y_pred_train,index = df_train.index)
    pred_group_train_series = pred_train_series.groupby(pred_train_series.index).mean()
    pred_train_boolean = pred_group_train_series >=0.5
    y_pred_train_last= pred_train_boolean*1

    pred_test_series = pd.Series(y_pred_test,index = df_test.index)
    pred_group_test_series = pred_test_series.groupby(pred_test_series.index).mean()
    pred_test_boolean = pred_group_test_series >=0.5
    y_pred_test_last= pred_test_boolean*1

    y_train = df_train['Label']
    y_train_last = y_train.groupby(y_train.index).mean()

    y_test = df_test['Label']
    y_test_last = y_test.groupby(y_test.index).mean()
    
    return y_train_last,y_pred_train_last,y_test_last,y_pred_test_last

In [4]:
def calculate_accuracies(y_true, y_pred):
    return np.sum(np.array([y_true==y_pred]))/len(y_true)

In [5]:
def class_report(y_true, y_pred, accuracy, dataset):    
    print(dataset + ' Set:\n')
    print(dataset + ' Confusion Matrix:')
    print(confusion_matrix(y_true, y_pred))    
    print(dataset + ' Classification report:')
    print(classification_report(y_true, y_pred))    
    print(dataset + ' Accuracy: ' + str(accuracy))    
    print('\n')

In [6]:
#Classification with Logistic regression
y_train, y_pred_train, y_test, y_pred_test = log_reg(df_train, df_test)

#Calculate training and test accuracy for the model
train_acc = calculate_accuracies(y_true=y_train, y_pred=y_pred_train)
test_acc = calculate_accuracies(y_true=y_test, y_pred=y_pred_test)

#Get report for model
class_report(y_train, y_pred_train, train_acc, dataset='Training')
class_report(y_test, y_pred_test, test_acc, dataset='Test')

Training Set:

Training Confusion Matrix:
[[111   4]
 [ 17  63]]
Training Classification report:
             precision    recall  f1-score   support

          0       0.87      0.97      0.91       115
          1       0.94      0.79      0.86        80

avg / total       0.90      0.89      0.89       195

Training Accuracy: 0.892307692308


Test Set:

Test Confusion Matrix:
[[45  4]
 [10 23]]
Test Classification report:
             precision    recall  f1-score   support

          0       0.82      0.92      0.87        49
          1       0.85      0.70      0.77        33

avg / total       0.83      0.83      0.83        82

Test Accuracy: 0.829268292683


