# Hyperparameter Tuning and Grid Search CV

### Read data from pickle file

In [1]:
import pickle as pkl

with open('../data/titanic_tansformed.pkl', 'rb') as f:
    df_data = pkl.load(f)

In [2]:
df_data.head()

Unnamed: 0,Survived,Age,SibSp,Parch,Fare,2,3,male,Q,S
0,0,22.0,1,0,7.25,0,1,1,0,1
1,1,38.0,1,0,71.2833,0,0,0,0,0
2,1,26.0,0,0,7.925,0,1,0,0,1
3,1,35.0,1,0,53.1,0,0,0,0,1
4,0,35.0,0,0,8.05,0,1,1,0,1


In [3]:
data = df_data.drop("Survived",axis=1)
label = df_data["Survived"]

#### Divide into train and test split

In [4]:
from sklearn.model_selection import train_test_split
data_train, data_test, label_train, label_test = train_test_split(data, label, test_size = 0.2, random_state = 101)

In [5]:
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

C_param_range = [0.001,0.01,0.1,0.5,1,10,100]
penalties = ['l1','l2']
score_func = 'accuracy'

log_regr = LogisticRegression()
log_grid = GridSearchCV(estimator=log_regr, 
                    param_grid=[{'C':C_param_range, 'penalty': penalties}], 
                    cv=5, 
                    scoring=score_func)
log_grid.fit(data_train, label_train)
print('Best Score', log_grid.best_score_)
print('Best Value of C', log_grid.best_estimator_.C)
print('Best penalty', log_grid.best_estimator_.penalty)

Best Score 0.8045007032348804
Best Value of C 10
Best penalty l2


### Optimal Model after a GridCV Search

In [6]:
optimal_model = LogisticRegression(C=log_grid.best_estimator_.C, penalty=log_grid.best_estimator_.penalty)
optimal_model.fit(data_train, label_train)
predictions = optimal_model.predict(data_test)

print('Accuracy', optimal_model.score(data_test, label_test))
print('Coefficients', optimal_model.coef_)
print('Intercept', optimal_model.intercept_)

Accuracy 0.8146067415730337
Coefficients [[-3.25405513e-02 -2.91706401e-01 -1.29806921e-01  1.72297040e-03
  -8.59465200e-01 -2.08427655e+00 -2.64380651e+00  1.00248953e-01
  -3.88383310e-01]]
Intercept [3.71643509]
