## Logistic Regression

In [16]:
from sklearn.linear_model import LogisticRegression

LogisticRegression()

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [17]:
import joblib
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=DataConversionWarning)

tr_features = pd.read_csv('../Resources/train_features.csv')
tr_labels = pd.read_csv('../Resources/train_labels.csv', header=None)
test_features = pd.read_csv('../Resources/test_features.csv')
test_labels = pd.read_csv('../Resources/test_labels.csv', header=None)
val_features= pd.read_csv('../Resources/val_features.csv')
val_labels= pd.read_csv('../Resources/val_labels.csv', header=None)

In [18]:
def print_results(results):
    print('BEST PARAMS: {}\n'.format(results.best_params_))

    means = results.cv_results_['mean_test_score']
    stds = results.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, results.cv_results_['params']):
        print('{} (+/-{}) for {}'.format(round(mean, 3), round(std * 2, 3), params))

In [19]:
# Scale data
from sklearn.preprocessing import MinMaxScaler
X_scaler = MinMaxScaler().fit(tr_features)

tr_features_scaled = X_scaler.transform(tr_features)
test_features_scaled = X_scaler.transform(test_features)
val_features_scaled = X_scaler.transform(val_features)

In [20]:
lr = LogisticRegression()
parameters = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100]
}

cv = GridSearchCV(lr, parameters, cv=5)
cv.fit(tr_features_scaled, tr_labels.values.ravel())

print_results(cv)

BEST PARAMS: {'C': 100}

0.511 (+/-0.001) for {'C': 0.001}
0.814 (+/-0.02) for {'C': 0.01}
0.823 (+/-0.021) for {'C': 0.1}
0.84 (+/-0.028) for {'C': 1}
0.864 (+/-0.018) for {'C': 10}
0.874 (+/-0.031) for {'C': 100}


In [21]:
cv.best_estimator_

LogisticRegression(C=100, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

### Save the Model

In [22]:
joblib.dump(cv.best_estimator_, '../Saved_model/LR_model.pkl')

['../Saved_model/LR_model.pkl']

### Evaluate and predict using test-data

In [23]:
model_test = joblib.load('../Saved_model/LR_model.pkl')

In [11]:
Predicted = model_test.predict(test_features_scaled[:20])
Actual = (test_labels[:20]).values.ravel()

In [12]:
Compare_df = pd.DataFrame({
    "Predicted":Predicted,
    "Actual": Actual
})
Compare_df

Unnamed: 0,Predicted,Actual
0,2,2
1,0,0
2,2,2
3,0,0
4,0,1
5,2,2
6,1,1
7,1,0
8,2,2
9,2,2


#### Model Validation

In [13]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
from time import time

def evaluate_model(name, model, features, labels):
    start = time()
    pred = model.predict(features)
    end = time()
    accuracy = round(accuracy_score(labels, pred), 3)
    precision = round(precision_score(labels, pred, average='weighted'), 3)
    recall = round(recall_score(labels, pred, average='weighted'), 3)
    print('{} -- Accuracy: {} / Precision: {} / recall: {} /Latency: {}ms'.format(name,
                                                                            accuracy,
                                                                            precision,
                                                                            recall,
                                                                            round((end - start)*1000, 1)))

In [14]:
evaluate_model('Logistic Model', model_test, val_features_scaled, val_labels)

Logistic Model -- Accuracy: 0.886 / Precision: 0.885 / recall: 0.886 /Latency: 1.0ms


In [15]:
#another way to score model
model_test.score(val_features_scaled, val_labels)

0.8862660944206009