In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
pd.set_option('display.max_columns', None)  # this will output all columns when display a pandas dataframe

In [2]:
df=pd.read_csv('2012-2013_cleaned2.csv', index_col=0)

In [3]:
y=df['loan_status']
df.drop(columns=['emp_title','issue_d','desc','title','zip_code','loan_status'], inplace=True)
scaler=StandardScaler()
scaler.fit(df)
scaled=scaler.transform(df)
df=pd.DataFrame(scaled, columns=df.columns)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.30, random_state=101)

In [5]:
model = SVC(verbose=True, max_iter=100000)

In [7]:
model.fit(X_train,y_train)

[LibSVM]



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=100000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=True)

In [10]:
predictions = model.predict(X_test)

In [11]:
print(confusion_matrix(y_test,predictions))
print(classification_report(y_test,predictions))

[[45573     0]
 [ 8519     1]]
             precision    recall  f1-score   support

        0.0       0.84      1.00      0.91     45573
        1.0       1.00      0.00      0.00      8520

avg / total       0.87      0.84      0.77     54093



In [6]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.0001, 0.01, 1, 100, 10000], 'kernel': ['linear'], 'class_weight':['balanced',None]}


In [None]:
grid = GridSearchCV(SVC(),param_grid,refit=True,error_score=0,verbose=5)
grid.fit(X_train,y_train)


Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] C=0.0001, class_weight=balanced, kernel=linear ..................
[CV]  C=0.0001, class_weight=balanced, kernel=linear, score=0.6593221144704317, total= 7.3min
[CV] C=0.0001, class_weight=balanced, kernel=linear ..................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  9.0min remaining:    0.0s


[CV]  C=0.0001, class_weight=balanced, kernel=linear, score=0.6628398935158776, total= 8.4min
[CV] C=0.0001, class_weight=balanced, kernel=linear ..................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 19.3min remaining:    0.0s


[CV]  C=0.0001, class_weight=balanced, kernel=linear, score=0.666238828674653, total= 6.8min
[CV] C=0.0001, class_weight=None, kernel=linear ......................


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 27.8min remaining:    0.0s


[CV]  C=0.0001, class_weight=None, kernel=linear, score=0.8447898840083666, total= 1.2min
[CV] C=0.0001, class_weight=None, kernel=linear ......................


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 29.8min remaining:    0.0s


[CV]  C=0.0001, class_weight=None, kernel=linear, score=0.8447898840083666, total= 1.3min
[CV] C=0.0001, class_weight=None, kernel=linear ......................
[CV]  C=0.0001, class_weight=None, kernel=linear, score=0.8447898840083666, total= 1.3min
[CV] C=0.01, class_weight=balanced, kernel=linear ....................
[CV]  C=0.01, class_weight=balanced, kernel=linear, score=0.6408300057045065, total= 6.6min
[CV] C=0.01, class_weight=balanced, kernel=linear ....................
[CV]  C=0.01, class_weight=balanced, kernel=linear, score=0.6403308613804906, total=10.6min
[CV] C=0.01, class_weight=balanced, kernel=linear ....................
[CV]  C=0.01, class_weight=balanced, kernel=linear, score=0.6469623502567028, total= 6.8min
[CV] C=0.01, class_weight=None, kernel=linear ........................
[CV]  C=0.01, class_weight=None, kernel=linear, score=0.8447898840083666, total= 1.2min
[CV] C=0.01, class_weight=None, kernel=linear ........................
[CV]  C=0.01, class_weight=Non

In [None]:
grid.best_params_

In [None]:
grid_predictions = grid.predict(X_test)

In [None]:
print(confusion_matrix(y_test,grid_predictions))
print(classification_report(y_test,grid_predictions))

In [7]:
param_grid = {'C': [0.0001, 0.01, 1, 100, 10000], 'kernel': ['rbf'], 'gamma': ['auto', 'scale', 1e-8, 1e-6, 1e-4, 1e-2, 1, 100, 1000], 'class_weight':['balanced',None]}


In [None]:
grid = GridSearchCV(SVC(),param_grid,refit=True,error_score=0,verbose=5)
grid.fit(X_train,y_train)


Fitting 3 folds for each of 90 candidates, totalling 270 fits
[CV] C=0.0001, class_weight=balanced, gamma=auto, kernel=rbf .........
[CV]  C=0.0001, class_weight=balanced, gamma=auto, kernel=rbf, score=0.8447898840083666, total=13.3min
[CV] C=0.0001, class_weight=balanced, gamma=auto, kernel=rbf .........


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed: 16.9min remaining:    0.0s


[CV]  C=0.0001, class_weight=balanced, gamma=auto, kernel=rbf, score=0.8447898840083666, total=12.5min
[CV] C=0.0001, class_weight=balanced, gamma=auto, kernel=rbf .........


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 33.1min remaining:    0.0s


[CV]  C=0.0001, class_weight=balanced, gamma=auto, kernel=rbf, score=0.8447898840083666, total=13.2min
[CV] C=0.0001, class_weight=balanced, gamma=scale, kernel=rbf ........
[CV]  C=0.0001, class_weight=balanced, gamma=scale, kernel=rbf, score=0, total=   0.0s
[CV] C=0.0001, class_weight=balanced, gamma=scale, kernel=rbf ........
[CV]  C=0.0001, class_weight=balanced, gamma=scale, kernel=rbf, score=0, total=   0.0s
[CV] C=0.0001, class_weight=balanced, gamma=scale, kernel=rbf ........
[CV]  C=0.0001, class_weight=balanced, gamma=scale, kernel=rbf, score=0, total=   0.0s
[CV] C=0.0001, class_weight=balanced, gamma=1e-08, kernel=rbf ........


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 49.6min remaining:    0.0s
TypeError('must be real number, not str',)
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed: 49.6min remaining:    0.0s
TypeError('must be real number, not str',)
TypeError('must be real number, not str',)


[CV]  C=0.0001, class_weight=balanced, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total=12.3min
[CV] C=0.0001, class_weight=balanced, gamma=1e-08, kernel=rbf ........
[CV]  C=0.0001, class_weight=balanced, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total=13.5min
[CV] C=0.0001, class_weight=balanced, gamma=1e-08, kernel=rbf ........
[CV]  C=0.0001, class_weight=balanced, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total=13.5min
[CV] C=0.0001, class_weight=balanced, gamma=1e-06, kernel=rbf ........
[CV]  C=0.0001, class_weight=balanced, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total=13.1min
[CV] C=0.0001, class_weight=balanced, gamma=1e-06, kernel=rbf ........
[CV]  C=0.0001, class_weight=balanced, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total=13.2min
[CV] C=0.0001, class_weight=balanced, gamma=1e-06, kernel=rbf ........
[CV]  C=0.0001, class_weight=balanced, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total=54.5min
[CV] C=0.0001, class_

TypeError('must be real number, not str',)
TypeError('must be real number, not str',)
TypeError('must be real number, not str',)


[CV]  C=0.0001, class_weight=None, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total= 2.2min
[CV] C=0.0001, class_weight=None, gamma=1e-08, kernel=rbf ............
[CV]  C=0.0001, class_weight=None, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total= 2.3min
[CV] C=0.0001, class_weight=None, gamma=1e-08, kernel=rbf ............
[CV]  C=0.0001, class_weight=None, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total= 2.3min
[CV] C=0.0001, class_weight=None, gamma=1e-06, kernel=rbf ............
[CV]  C=0.0001, class_weight=None, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total= 2.2min
[CV] C=0.0001, class_weight=None, gamma=1e-06, kernel=rbf ............
[CV]  C=0.0001, class_weight=None, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total= 2.2min
[CV] C=0.0001, class_weight=None, gamma=1e-06, kernel=rbf ............
[CV]  C=0.0001, class_weight=None, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total= 2.2min
[CV] C=0.0001, class_weight=None, gamma=0.000

TypeError('must be real number, not str',)
TypeError('must be real number, not str',)
TypeError('must be real number, not str',)


[CV]  C=0.01, class_weight=balanced, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total=15.9min
[CV] C=0.01, class_weight=balanced, gamma=1e-08, kernel=rbf ..........
[CV]  C=0.01, class_weight=balanced, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total=16.0min
[CV] C=0.01, class_weight=balanced, gamma=1e-08, kernel=rbf ..........
[CV]  C=0.01, class_weight=balanced, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total=78.1min
[CV] C=0.01, class_weight=balanced, gamma=1e-06, kernel=rbf ..........
[CV]  C=0.01, class_weight=balanced, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total=13.6min
[CV] C=0.01, class_weight=balanced, gamma=1e-06, kernel=rbf ..........
[CV]  C=0.01, class_weight=balanced, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total=13.6min
[CV] C=0.01, class_weight=balanced, gamma=1e-06, kernel=rbf ..........
[CV]  C=0.01, class_weight=balanced, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total=13.6min
[CV] C=0.01, class_weight=balance

TypeError('must be real number, not str',)
TypeError('must be real number, not str',)
TypeError('must be real number, not str',)


[CV]  C=0.01, class_weight=None, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total= 2.1min
[CV] C=0.01, class_weight=None, gamma=1e-08, kernel=rbf ..............
[CV]  C=0.01, class_weight=None, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total= 2.1min
[CV] C=0.01, class_weight=None, gamma=1e-08, kernel=rbf ..............
[CV]  C=0.01, class_weight=None, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total= 2.2min
[CV] C=0.01, class_weight=None, gamma=1e-06, kernel=rbf ..............
[CV]  C=0.01, class_weight=None, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total= 2.0min
[CV] C=0.01, class_weight=None, gamma=1e-06, kernel=rbf ..............
[CV]  C=0.01, class_weight=None, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total= 2.0min
[CV] C=0.01, class_weight=None, gamma=1e-06, kernel=rbf ..............
[CV]  C=0.01, class_weight=None, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total= 2.0min
[CV] C=0.01, class_weight=None, gamma=0.0001, kernel=rbf 

TypeError('must be real number, not str',)
TypeError('must be real number, not str',)
TypeError('must be real number, not str',)


[CV]  C=1, class_weight=balanced, gamma=1e-08, kernel=rbf, score=0.1552101159916334, total=14.0min
[CV] C=1, class_weight=balanced, gamma=1e-08, kernel=rbf .............
[CV]  C=1, class_weight=balanced, gamma=1e-08, kernel=rbf, score=0.1552101159916334, total=302.5min
[CV] C=1, class_weight=balanced, gamma=1e-08, kernel=rbf .............
[CV]  C=1, class_weight=balanced, gamma=1e-08, kernel=rbf, score=0.1552101159916334, total=167.2min
[CV] C=1, class_weight=balanced, gamma=1e-06, kernel=rbf .............
[CV]  C=1, class_weight=balanced, gamma=1e-06, kernel=rbf, score=0.17683970336565888, total=14.3min
[CV] C=1, class_weight=balanced, gamma=1e-06, kernel=rbf .............
[CV]  C=1, class_weight=balanced, gamma=1e-06, kernel=rbf, score=0.17325061798821068, total=15.6min
[CV] C=1, class_weight=balanced, gamma=1e-06, kernel=rbf .............
[CV]  C=1, class_weight=balanced, gamma=1e-06, kernel=rbf, score=0.17439151930024718, total=15.0min
[CV] C=1, class_weight=balanced, gamma=0.0001,

TypeError('must be real number, not str',)
TypeError('must be real number, not str',)
TypeError('must be real number, not str',)


[CV]  C=1, class_weight=None, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total= 2.2min
[CV] C=1, class_weight=None, gamma=1e-08, kernel=rbf .................
[CV]  C=1, class_weight=None, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total= 2.1min
[CV] C=1, class_weight=None, gamma=1e-08, kernel=rbf .................
[CV]  C=1, class_weight=None, gamma=1e-08, kernel=rbf, score=0.8447898840083666, total= 2.1min
[CV] C=1, class_weight=None, gamma=1e-06, kernel=rbf .................
[CV]  C=1, class_weight=None, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total= 2.0min
[CV] C=1, class_weight=None, gamma=1e-06, kernel=rbf .................
[CV]  C=1, class_weight=None, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total= 2.0min
[CV] C=1, class_weight=None, gamma=1e-06, kernel=rbf .................
[CV]  C=1, class_weight=None, gamma=1e-06, kernel=rbf, score=0.8447898840083666, total= 2.1min
[CV] C=1, class_weight=None, gamma=0.0001, kernel=rbf ................
[CV]

In [None]:
param_grid = {'C': [0.0001, 0.01, 1, 100, 10000], 'kernel': ['poly'], 'degree': [3,5,7,9], 'class_weight':['balanced',None]}


In [None]:
param_grid = {'C': [0.0001, 0.01, 1, 100, 10000], 'kernel': ['sigmoid'], 'coef0':[1.0e-8, 1.0e-6, 1.0e-4, 1.0e-2, 0.0, 1.0, 100.0], 'class_weight':['balanced',None]}
