# Fitting Logistic Regression

In [1]:
import Data_Helper as DH
import lendingclub
import prediction

import numpy as np
import pandas as pd
import json
import time
import sklearn
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')

pd.options.mode.chained_assignment = None  # default='warn'
%load_ext autoreload
%autoreload 2

## Instantiate Objects

In [2]:
# initialize config object
config = lendingclub.ConfigData("config_data_dummy.ini")

# initialize lendingclup api object
lc = lendingclub.LendingClub(config)

# initialize data transformer
transformer = DH.Transformer_full()

# initialize DataHelper
periodStart = ("Q1", "2014")
periodEnd = ("Q2", "2016")
DataHelper = DH.DataHelper(periodStart, periodEnd, transformer, lc)

In [3]:
# set training data
DataHelper.set_training_dataset()

KeyboardInterrupt: 

In [4]:
# set test data
DataHelper.set_test_dataset(["LoanStats_2016Q3.csv", "LoanStats_2016Q4.csv"])

## Logistic Regression - choose penalization type / solver

Based on the test run, l1/saga, l2/saga, and l2sag seem to be appropriat choices. We will try to finetune C parameter for these three pairs.

In [5]:
# logstic regression model
logistic_model = prediction.ModelLogistic()

In [6]:
# get training/test for this model
training, test = logistic_model.get_data_for_model(DataHelper.training, DataHelper.test)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


In [7]:
# Check time - l1/saga
start = time.time()

logistic_model.solver = "saga"
logistic_model.penalty = "l1"

kwargs = {"C":1.0}
model = logistic_model.fit_model(training, **kwargs)
score = logistic_model.test_model(test, model)
print("Raw test score:",score)

end = time.time()
print(end - start)

Raw test score: {'score': 0.7656695802962022, 'AUC': 0.680155035085399}
188.24476742744446


In [17]:
# Check time - l2/sag
start = time.time()

logistic_model.solver = "sag"
logistic_model.penalty = "l2"

kwargs = {"C":1.0}
model = logistic_model.fit_model(training, **kwargs)
score = logistic_model.test_model(test, model)
print("Raw test score:",score)

end = time.time()
print(end - start)

Raw test score: {'score': 0.7656695802962022, 'AUC': 0.6801530740858839}
240.99648475646973


In [18]:
# Check time - l2/lbfgs
start = time.time()

logistic_model.solver = "lbfgs"
logistic_model.penalty = "l2"
logistic_model.max_iter = 500

kwargs = {"C":1.0}
model = logistic_model.fit_model(training, **kwargs)
score = logistic_model.test_model(test, model)
print("Raw test score:",score)

end = time.time()
print(end - start)

Raw test score: {'score': 0.7656594985331034, 'AUC': 0.6801518382947149}
40.51711344718933




In [20]:
# Check time - l2/lbfgs
start = time.time()

logistic_model.solver = "saga"
logistic_model.penalty = "l2"
logistic_model.max_iter = 500

kwargs = {"C":1.0}
model = logistic_model.fit_model(training, **kwargs)
score = logistic_model.test_model(test, model)
print("Raw test score:",score)

end = time.time()
print(end - start)

Raw test score: {'score': 0.7656695802962022, 'AUC': 0.6801559992058404}
238.20778250694275


## Logistic Regression - CV

In [9]:
# get CV index from DataHelper
CVs = DataHelper.get_cross_validation_data(fold=5)

We first try l1 penalization with saga solver. Max_iteration is set to be 500.

In [10]:
logistic_model = prediction.ModelLogistic()
logistic_model.solver = "saga"
logistic_model.penalty = "l1"
logistic_model.max_iter = 500

In [11]:
# choose grids to fit
grids = [{"C": x} for x in [10000, 1000, 100, 10, 1, 0.1, 0.01, 0.001]]

In [12]:
scores = logistic_model.tune_parameters(DataHelper.training, CVs, grids, verbose=True)

fitting for {'C': 10000}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445823937157265, 'AUC': 0.7008842315175903}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457634322574193, 'AUC': 0.700992980385214}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523945913541597, 'AUC': 0.6997270177405817}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514802850672706, 'AUC': 0.6965759762622616}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512673242602072, 'AUC': 0.699806643193744}
fitting for {'C': 1000}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445823937157265, 'AUC': 0.7008842130807473}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457634322574193, 'AUC': 0.700993042161293}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523945913541597, 'AUC': 0.6997269813685911}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514802850672706, 'AUC': 0.6965760469130046}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512673242602072, 'AUC': 0.69980648129643}
fitting for {'C': 100}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445823937157265, 'AUC': 0.7008841277445025}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457634322574193, 'AUC': 0.7009936556616634}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523945913541597, 'AUC': 0.6997268342273568}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514802850672706, 'AUC': 0.6965753239751703}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512673242602072, 'AUC': 0.6998051303289786}
fitting for {'C': 10}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8446073577259451, 'AUC': 0.7008831764034031}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457634322574193, 'AUC': 0.7010023676864523}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523779391194298, 'AUC': 0.6997240374417237}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514719595044625, 'AUC': 0.6965688608013958}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512673242602072, 'AUC': 0.6997955658075936}
fitting for {'C': 1}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8446406430729032, 'AUC': 0.7008650487725875}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.84578843413256, 'AUC': 0.7010631995499881}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.85234463464997, 'AUC': 0.6996905708017388}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514636339416545, 'AUC': 0.6964975019080302}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512590002913389, 'AUC': 0.6996950718785787}
fitting for {'C': 0.1}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445241443585497, 'AUC': 0.700586377997539}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8459551133001642, 'AUC': 0.701053132179329}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523945913541597, 'AUC': 0.6993059915601992}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514802850672706, 'AUC': 0.69596876653556}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512173804469971, 'AUC': 0.699071224097934}
fitting for {'C': 0.01}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445907150524661, 'AUC': 0.6996416957590831}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8459301114250235, 'AUC': 0.7002528353252457}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8524195697062545, 'AUC': 0.6983842944598757}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512305181830292, 'AUC': 0.6951844102031292}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8513422399800225, 'AUC': 0.6983391643713894}
fitting for {'C': 0.001}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445075016850707, 'AUC': 0.6957051700800612}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.845513413506013, 'AUC': 0.6966967512585297}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8524278958236196, 'AUC': 0.6951281300357296}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512138670574131, 'AUC': 0.6916722237583557}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.851192408540392, 'AUC': 0.6957552307326609}


In [13]:
# find best param
_, avg_score, param = logistic_model.best_grid(scores, "AUC")
print(avg_score)
print(param)

0.6995973698198783
{'C': 10000}


For the next, we try l2 penalization with saga solver. Max_iteration is set to be 500.

In [14]:
logistic_model = prediction.ModelLogistic()
logistic_model.solver = "saga"
logistic_model.penalty = "l2"
logistic_model.max_iter = 500

In [15]:
# choose grids to fit
grids = [{"C": x} for x in [10000, 1000, 100, 10, 1, 0.1, 0.01, 0.001]]

In [16]:
scores = logistic_model.tune_parameters(DataHelper.training, CVs, grids, verbose=True)

fitting for {'C': 10000}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445990363892055, 'AUC': 0.7008823557005057}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457801001741797, 'AUC': 0.7010220817113708}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523779391194298, 'AUC': 0.6997223995999698}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514802850672706, 'AUC': 0.6965651086444998}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512506763224705, 'AUC': 0.699790506516531}
fitting for {'C': 1000}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445990363892055, 'AUC': 0.7008824115378016}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457801001741797, 'AUC': 0.7010219981006434}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523779391194298, 'AUC': 0.6997223703921592}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514802850672706, 'AUC': 0.6965652630903099}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512506763224705, 'AUC': 0.699790344619217}
fitting for {'C': 100}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445990363892055, 'AUC': 0.7008822334905749}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457801001741797, 'AUC': 0.701022430533196}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523696130020649, 'AUC': 0.6997222210465619}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514719595044625, 'AUC': 0.6965646491408309}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512506763224705, 'AUC': 0.699789877523588}
fitting for {'C': 10}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8446156790626846, 'AUC': 0.7008815939955058}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.84578843413256, 'AUC': 0.70102612538178}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523612868846999, 'AUC': 0.6997206427226105}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514719595044625, 'AUC': 0.6965598136725438}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512506763224705, 'AUC': 0.6997831051839192}
fitting for {'C': 1}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8446406430729032, 'AUC': 0.7008676583760239}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457801001741797, 'AUC': 0.7010561150053478}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523612868846999, 'AUC': 0.6996974517210321}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514719595044625, 'AUC': 0.6965145183311446}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512590002913389, 'AUC': 0.6997224921422374}
fitting for {'C': 0.1}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445491083687684, 'AUC': 0.7007253939009122}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8458634397579818, 'AUC': 0.7011053761027859}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523779391194298, 'AUC': 0.6995098427896761}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8515052617556946, 'AUC': 0.6962241558268054}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512173804469971, 'AUC': 0.6993568569036679}
fitting for {'C': 0.01}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.844574072378987, 'AUC': 0.7003798563836302}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8459217774666433, 'AUC': 0.7009561858070185}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8524278958236196, 'AUC': 0.6991097090112908}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514386572532303, 'AUC': 0.6957116761496832}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512423523536022, 'AUC': 0.6987774510296177}
fitting for {'C': 0.001}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8444991803483312, 'AUC': 0.6997786757082209}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8459384453834038, 'AUC': 0.7004611304171003}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8524445480583495, 'AUC': 0.6986281763714413}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512471693086453, 'AUC': 0.6952887065823692}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8513755358554959, 'AUC': 0.6984971055932568}


In [17]:
# find best param
_, avg_score, param = logistic_model.best_grid(scores, "AUC")
print(avg_score)
print(param)

0.6995964904345755
{'C': 10000}


We first try l2 penalization with sag solver. Max_iteration is set to be 500.

In [18]:
logistic_model = prediction.ModelLogistic()
logistic_model.solver = "sag"
logistic_model.penalty = "l2"
logistic_model.max_iter = 500

In [19]:
# choose grids to fit
grids = [{"C": x} for x in [10000, 1000, 100, 10, 1, 0.1, 0.01, 0.001]]

In [20]:
scores = logistic_model.tune_parameters(DataHelper.training, CVs, grids, verbose=True)

fitting for {'C': 10000}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445823937157265, 'AUC': 0.7008841777873622}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457634322574193, 'AUC': 0.7009929942315766}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523945913541597, 'AUC': 0.6997271004041965}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514802850672706, 'AUC': 0.6965762396964271}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512673242602072, 'AUC': 0.6998067350814087}
fitting for {'C': 1000}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445823937157265, 'AUC': 0.7008842162413489}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457634322574193, 'AUC': 0.7009930123383583}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523945913541597, 'AUC': 0.6997269984524049}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514802850672706, 'AUC': 0.6965761991680939}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512673242602072, 'AUC': 0.6998068012624052}
fitting for {'C': 100}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445823937157265, 'AUC': 0.7008841114147273}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457634322574193, 'AUC': 0.700993573648593}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523945913541597, 'AUC': 0.6997269863284079}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514802850672706, 'AUC': 0.6965756996837721}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512673242602072, 'AUC': 0.6998061224969773}
fitting for {'C': 10}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8446073577259451, 'AUC': 0.7008833523435621}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457634322574193, 'AUC': 0.700998867219496}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523779391194298, 'AUC': 0.699724773698986}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514802850672706, 'AUC': 0.696570990729606}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512673242602072, 'AUC': 0.6997994157913546}
fitting for {'C': 1}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8446323217361636, 'AUC': 0.700871514836815}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8457717662157995, 'AUC': 0.7010476548778463}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.852352960767335, 'AUC': 0.6997016179672166}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514802850672706, 'AUC': 0.6965256417155496}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512590002913389, 'AUC': 0.699737670015425}
fitting for {'C': 0.1}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8445491083687684, 'AUC': 0.7007291592310221}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8458634397579818, 'AUC': 0.7011063245786189}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8523696130020649, 'AUC': 0.6995148092196516}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8515052617556946, 'AUC': 0.6962303402313695}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512173804469971, 'AUC': 0.699363413744885}
fitting for {'C': 0.01}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.844574072378987, 'AUC': 0.7003797088888861}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8459217774666433, 'AUC': 0.7009558705359948}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8524278958236196, 'AUC': 0.6991093722948332}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8514386572532303, 'AUC': 0.6957111750223208}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512423523536022, 'AUC': 0.6987769894034925}
fitting for {'C': 0.001}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8444991803483312, 'AUC': 0.6997787910701814}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8459384453834038, 'AUC': 0.7004618994227727}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8524445480583495, 'AUC': 0.6986277795860903}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8512471693086453, 'AUC': 0.6952888555513774}


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


{'score': 0.8513755358554959, 'AUC': 0.6984965909129444}


In [21]:
# find best param
_, avg_score, param = logistic_model.best_grid(scores, "AUC")
print(avg_score)
print(param)

0.6995974494401943
{'C': 10000}


## Fit Best Model & Save

In [None]:
# fit best model

In [22]:
logistic_model = prediction.ModelLogistic()
logistic_model.solver = "saga"
logistic_model.penalty = "l1"
logistic_model.max_iter = 500

In [23]:
training, test = logistic_model.get_data_for_model(DataHelper.training, DataHelper.test)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)
  test.loc[:, cvars] = scaler.transform(test[cvars])


In [24]:
kwargs = {"C":10000}
model = logistic_model.fit_model(training, **kwargs)
score = logistic_model.test_model(test, model)

In [25]:
score

{'score': 0.7656191714807086, 'AUC': 0.6801276036843475}

In [28]:
scaler = logistic_model.get_scaler(DataHelper.training)

  return self.partial_fit(X, y)


In [29]:
# save model

In [30]:
filename = "logistic_regression_20190610.sav"

In [31]:
logistic_model.save_model(model,scaler,training,filename)

Model is saved on logistic_regression_20190610.sav


## Predict listed loan by saved model

In [None]:
# predict by calling model

In [32]:
filename = "logistic_regression_20190610.sav"

In [33]:
# instantiate from beginning
new_model = prediction.ModelLogistic(filename)

In [34]:
# update from the saved model
new_model.set_model_from_file()

Model is loaded from logistic_regression_20190610.sav


In [35]:
# check model description
new_model.model_description

'Logistic Regression Object, time: 06/10/2019, C: 10000, penalty: l1, solver: saga, max_iter: 500, tol: 0.0005'

In [36]:
# get listed loan
loans = DataHelper.get_listed_loandata()
loans.shape

(65, 81)

In [37]:
# prediction
pred = new_model.predict_model(loans)
pred.shape

  df.loc[:, cvars] = self.scaler.transform(df[cvars])


(65, 2)