In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, train_test_split, RandomizedSearchCV, ParameterGrid
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.model_selection import cross_validate, cross_val_score, StratifiedKFold

import pickle
import numpy as np

import sys
sys.path.append('..')

from process_dataset import speech_features

In [16]:
def print_scores(scores):
    print('Accuracy: ', np.mean(scores['test_accuracy']))
    print('F1 Macro: ', np.mean(scores['test_f1_macro']))
    print('Precision Macro: ', np.mean(scores['test_precision_macro']))
    print('Recall Macro: ', np.mean(scores['test_recall_macro']))

def get_data():
    with open('../data/speech_features.pkl', 'rb') as f:
        data = pickle.load(f)

    x = np.array(data[0])
    y = np.array(data[1])
    
    x = MinMaxScaler().fit_transform(x)

    return x, y

x, y = get_data()

def cross_validate_model(model):
    x, y = get_data()
    scoring = {'accuracy': 'accuracy',
           'f1_macro': 'f1_macro',
           'precision_macro': 'precision_macro',
           'recall_macro' : 'recall_macro'}

    scores = cross_validate(model, x, y, cv=5, scoring=scoring, n_jobs=-1)
    print_scores(scores)

def check_accuracy(model):
    x, y = get_data()
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    model.fit(x_train, y_train)
    results = model.predict(x_test)

    print(classification_report(y_test, results))

## Logistic Regression

In [14]:
# lr = LogisticRegression(penalty='l1', solver='liblinear')
# lr = LogisticRegression(C=1, class_weight='balanced', random_state=42, solver='sag')
lr = LogisticRegression(C=0.75, class_weight='balanced', random_state=42, solver='liblinear')
# cross_validate_model(lr)
check_accuracy(lr)

              precision    recall  f1-score   support

         ang       0.62      0.73      0.67       208
         hap       0.64      0.58      0.61       317
         neu       0.71      0.56      0.63       369
         sad       0.60      0.80      0.69       213

    accuracy                           0.64      1107
   macro avg       0.64      0.67      0.65      1107
weighted avg       0.65      0.64      0.64      1107





### Random Search

In [8]:
params = {
    'solver': ['liblinear', 'saga', 'sag', 'newton-cg'],
    'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
    'C' : [0.001, 0.01, 0.1, 1, 10, 30, 50],
    'fit_intercept': [True, False],
    'class_weight': ['balanced', None],
    'multi_class': ['auto', 'ovr', 'multinomial']
}

In [None]:
lr_g = RandomizedSearchCV(LogisticRegression(random_state=42), param_distributions=params, n_iter=50, n_jobs=-1, cv=5, random_state=42, verbose=5)

lr_g.fit(x, y)

In [11]:
print(lr_g.best_params_)
print(lr_g.best_score_)
print(lr_g.best_estimator_)

{'solver': 'sag', 'penalty': 'l2', 'multi_class': 'auto', 'fit_intercept': True, 'class_weight': 'balanced', 'C': 1}
0.627367516592586
LogisticRegression(C=1, class_weight='balanced', random_state=42, solver='sag')


### Grid Search

In [20]:
params = {
    'penalty': ['l2', 'none', 'l1'],
    'solver' : ['sag', 'liblinear'],
    'C': [0.75, 1, 3, 5]
}
lr_g = GridSearchCV(LogisticRegression(random_state=42, multi_class='auto', fit_intercept=True, class_weight='balanced'), param_grid=params, cv=5, return_train_score=False, verbose=5, n_jobs=-1)

pg = ParameterGrid(params)
print(len(pg), 'combinations per fold')


24 combinations per fold


In [21]:
lr_g.fit(x, y)


Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV 1/5] END C=0.75, penalty=l2, solver=liblinear;, score=0.659 total time=   4.5s
[CV 2/5] END C=0.75, penalty=l2, solver=liblinear;, score=0.634 total time=   5.5s
[CV 3/5] END C=0.75, penalty=l2, solver=liblinear;, score=0.617 total time=   5.5s




[CV 4/5] END C=0.75, penalty=l2, solver=liblinear;, score=0.632 total time=   4.1s




[CV 5/5] END C=0.75, penalty=l2, solver=liblinear;, score=0.634 total time=   4.0s




[CV 1/5] END ....C=0.75, penalty=l2, solver=sag;, score=0.659 total time=  15.4s




[CV 2/5] END ....C=0.75, penalty=l2, solver=sag;, score=0.624 total time=  15.6s
[CV 4/5] END ....C=0.75, penalty=l2, solver=sag;, score=0.627 total time=  15.8s
[CV 1/5] END C=0.75, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/5] END C=0.75, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 3/5] END C=0.75, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 4/5] END C=0.75, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 5/5] END C=0.75, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 1/5] END ......C=0.75, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/5] END ......C=0.75, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 3/5] END ......C=0.75, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 4/5] END ......C=0.75, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 5/5] END ......C=0.75, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 5/5] END ....C



[CV 3/5] END ....C=0.75, penalty=l2, solver=sag;, score=0.609 total time=  16.9s




[CV 1/5] END ..C=0.75, penalty=none, solver=sag;, score=0.632 total time=  15.6s




[CV 2/5] END ..C=0.75, penalty=none, solver=sag;, score=0.605 total time=  15.3s




[CV 3/5] END ..C=0.75, penalty=none, solver=sag;, score=0.589 total time=  15.8s
[CV 3/5] END C=0.75, penalty=l1, solver=liblinear;, score=0.608 total time=  11.6s
[CV 2/5] END C=0.75, penalty=l1, solver=liblinear;, score=0.635 total time=  14.2s




[CV 5/5] END ..C=0.75, penalty=none, solver=sag;, score=0.615 total time=  16.0s
[CV 4/5] END ..C=0.75, penalty=none, solver=sag;, score=0.602 total time=  16.5s
[CV 1/5] END C=0.75, penalty=l1, solver=liblinear;, score=0.662 total time=  16.3s
[CV 1/5] END .C=1, penalty=l2, solver=liblinear;, score=0.663 total time=   4.7s
[CV 5/5] END C=0.75, penalty=l1, solver=liblinear;, score=0.626 total time=  14.7s
[CV 4/5] END C=0.75, penalty=l1, solver=liblinear;, score=0.636 total time=  17.5s
[CV 2/5] END .C=1, penalty=l2, solver=liblinear;, score=0.637 total time=   4.8s




[CV 1/5] END .......C=1, penalty=l2, solver=sag;, score=0.658 total time=  16.7s




[CV 2/5] END .......C=1, penalty=l2, solver=sag;, score=0.622 total time=  15.4s
[CV 3/5] END .C=1, penalty=l2, solver=liblinear;, score=0.612 total time=   5.8s
[CV 4/5] END .C=1, penalty=l2, solver=liblinear;, score=0.627 total time=   6.0s
[CV 5/5] END .C=1, penalty=l2, solver=liblinear;, score=0.630 total time=   4.1s




[CV 4/5] END .......C=1, penalty=l2, solver=sag;, score=0.624 total time=  15.0s
[CV 1/5] END .C=1, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/5] END .C=1, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 3/5] END .C=1, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 4/5] END .C=1, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 5/5] END .C=1, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 1/5] END .........C=1, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/5] END .........C=1, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 3/5] END .........C=1, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 4/5] END .........C=1, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 5/5] END .......C=1, penalty=l2, solver=sag;, score=0.619 total time=  15.2s
[CV 5/5] END .........C=1, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 3/5] END .......C=1, pen



[CV 1/5] END .....C=1, penalty=none, solver=sag;, score=0.632 total time=  15.7s




[CV 3/5] END .....C=1, penalty=none, solver=sag;, score=0.589 total time=  15.5s




[CV 2/5] END .....C=1, penalty=none, solver=sag;, score=0.605 total time=  17.0s




[CV 4/5] END .....C=1, penalty=none, solver=sag;, score=0.602 total time=  16.3s




[CV 5/5] END .....C=1, penalty=none, solver=sag;, score=0.615 total time=  15.7s
[CV 3/5] END .C=1, penalty=l1, solver=liblinear;, score=0.608 total time=  19.3s
[CV 1/5] END .C=1, penalty=l1, solver=liblinear;, score=0.659 total time=  19.9s
[CV 2/5] END .C=1, penalty=l1, solver=liblinear;, score=0.636 total time=  23.0s
[CV 1/5] END .C=3, penalty=l2, solver=liblinear;, score=0.658 total time=   5.5s




[CV 1/5] END .......C=3, penalty=l2, solver=sag;, score=0.648 total time=  15.8s




[CV 2/5] END .......C=3, penalty=l2, solver=sag;, score=0.614 total time=  16.4s
[CV 3/5] END .......C=3, penalty=l2, solver=sag;, score=0.595 total time=  16.1s




[CV 5/5] END .......C=3, penalty=l2, solver=sag;, score=0.617 total time=  15.5s




[CV 4/5] END .......C=3, penalty=l2, solver=sag;, score=0.614 total time=  16.7s
[CV 2/5] END .C=3, penalty=l2, solver=liblinear;, score=0.626 total time=   8.6s




[CV 4/5] END .C=1, penalty=l1, solver=liblinear;, score=0.632 total time=  29.0s
[CV 3/5] END .C=3, penalty=l2, solver=liblinear;, score=0.598 total time=  10.2s




[CV 4/5] END .C=3, penalty=l2, solver=liblinear;, score=0.616 total time=  10.0s
[CV 1/5] END .C=3, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/5] END .C=3, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 3/5] END .C=3, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 4/5] END .C=3, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 5/5] END .C=3, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 1/5] END .........C=3, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/5] END .........C=3, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 3/5] END .........C=3, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 4/5] END .........C=3, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 5/5] END .........C=3, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 5/5] END .C=3, penalty=l2, solver=liblinear;, score=0.627 total time=  10.3s
[CV 5/5] END .C=1, penalty=l



[CV 2/5] END .....C=3, penalty=none, solver=sag;, score=0.605 total time=  15.4s
[CV 1/5] END .....C=3, penalty=none, solver=sag;, score=0.632 total time=  16.2s




[CV 3/5] END .....C=3, penalty=none, solver=sag;, score=0.589 total time=  14.9s




[CV 4/5] END .....C=3, penalty=none, solver=sag;, score=0.602 total time=  15.8s




[CV 5/5] END .....C=3, penalty=none, solver=sag;, score=0.615 total time=  16.2s




[CV 1/5] END .......C=5, penalty=l2, solver=sag;, score=0.643 total time=  17.0s




[CV 2/5] END .......C=5, penalty=l2, solver=sag;, score=0.614 total time=  16.5s




[CV 3/5] END .......C=5, penalty=l2, solver=sag;, score=0.590 total time=  16.6s
[CV 1/5] END .C=5, penalty=l2, solver=liblinear;, score=0.652 total time=  10.1s




[CV 4/5] END .......C=5, penalty=l2, solver=sag;, score=0.610 total time=  17.1s




[CV 5/5] END .......C=5, penalty=l2, solver=sag;, score=0.618 total time=  17.5s
[CV 2/5] END .C=5, penalty=l2, solver=liblinear;, score=0.618 total time=  12.5s
[CV 3/5] END .C=5, penalty=l2, solver=liblinear;, score=0.599 total time=  14.1s




[CV 4/5] END .C=5, penalty=l2, solver=liblinear;, score=0.608 total time=  14.6s




[CV 5/5] END .C=5, penalty=l2, solver=liblinear;, score=0.627 total time=  12.7s




[CV 3/5] END .C=3, penalty=l1, solver=liblinear;, score=0.608 total time= 1.2min




[CV 1/5] END .....C=5, penalty=none, solver=sag;, score=0.632 total time=  17.0s




[CV 2/5] END .....C=5, penalty=none, solver=sag;, score=0.605 total time=  16.6s
[CV 1/5] END .C=5, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 2/5] END .C=5, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 3/5] END .C=5, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 4/5] END .C=5, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 5/5] END .C=5, penalty=none, solver=liblinear;, score=nan total time=   0.0s
[CV 1/5] END .........C=5, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 2/5] END .........C=5, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 3/5] END .........C=5, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 4/5] END .........C=5, penalty=l1, solver=sag;, score=nan total time=   0.0s
[CV 5/5] END .........C=5, penalty=l1, solver=sag;, score=nan total time=   0.0s




[CV 3/5] END .....C=5, penalty=none, solver=sag;, score=0.589 total time=  15.4s
[CV 1/5] END .C=3, penalty=l1, solver=liblinear;, score=0.659 total time= 1.4min
[CV 2/5] END .C=3, penalty=l1, solver=liblinear;, score=0.637 total time= 1.4min




[CV 4/5] END .....C=5, penalty=none, solver=sag;, score=0.602 total time=  16.1s




[CV 5/5] END .....C=5, penalty=none, solver=sag;, score=0.615 total time=  16.4s
[CV 5/5] END .C=3, penalty=l1, solver=liblinear;, score=0.634 total time= 1.4min
[CV 4/5] END .C=3, penalty=l1, solver=liblinear;, score=0.614 total time= 1.4min
[CV 2/5] END .C=5, penalty=l1, solver=liblinear;, score=0.637 total time= 1.3min
[CV 4/5] END .C=5, penalty=l1, solver=liblinear;, score=0.602 total time= 1.4min
[CV 3/5] END .C=5, penalty=l1, solver=liblinear;, score=0.598 total time= 1.5min
[CV 1/5] END .C=5, penalty=l1, solver=liblinear;, score=0.653 total time= 1.6min
[CV 5/5] END .C=5, penalty=l1, solver=liblinear;, score=0.633 total time= 1.4min


40 fits failed out of a total of 120.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
20 fits failed with the following error:
Traceback (most recent call last):
  File "/home/rafid/Documents/github/CSE400-NLP/env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/rafid/Documents/github/CSE400-NLP/env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/home/rafid/Documents/github/CSE400-NLP/env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 464, in _check_solver
    raise ValueError("penalty='none' is not 

GridSearchCV(cv=5,
             estimator=LogisticRegression(class_weight='balanced',
                                          random_state=42),
             n_jobs=-1,
             param_grid={'C': [0.75, 1, 3, 5], 'penalty': ['l2', 'none', 'l1'],
                         'solver': ['sag', 'liblinear']},
             verbose=5)

In [22]:
print(lr_g.best_score_)
print(lr_g.best_params_)
print(lr_g.best_estimator_)

0.6351429584217482
{'C': 0.75, 'penalty': 'l2', 'solver': 'liblinear'}
LogisticRegression(C=0.75, class_weight='balanced', random_state=42,
                   solver='liblinear')


## SVM

In [None]:
svm = SVC(kernel='linear', probability=True, random_state=42)
test_accuracy(svm)

## Random Forest

In [None]:
rf = RandomForestClassifier(random_state=42)
test_accuracy(rf)