# ***Hyper_tunning in Logistic Regression:***


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#importing data
df=pd.read_csv('/content/Churn_Modelling.csv')

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [5]:
#Droping the columns which are not usefull
df.drop(['RowNumber','CustomerId','Surname'],axis=1,inplace=True)

In [6]:
#Scaling the data for better outcome ->Importing Standard Scaler 
#LabelEncoding on columns which have catogrical data ->Importing LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [7]:
label_encoding =LabelEncoder()
df['Gender'] =label_encoding.fit_transform(df['Gender'])
df['Geography']=label_encoding.fit_transform(df['Geography'])

In [8]:
df['Gender'].unique()

array([0, 1])

In [9]:
df['Geography'].unique()

array([0, 2, 1])

In [10]:
#dividing data into Independent and Dependent data
X=df.iloc[:,:-1]
y=df.iloc[:,-1]

In [11]:
print(X.shape)
print(y.shape)

(10000, 10)
(10000,)


In [12]:
#Applying Scaling to all data we can also apply Scaling on indivisual row 
Scaler=StandardScaler()
X_scale=Scaler.fit_transform(X.values)

In [13]:
#Train test Splits
from sklearn.model_selection import train_test_split

In [14]:
X_train,X_test,y_train,y_test =train_test_split(X_scale,y,test_size=0.3,random_state=2)

# ***LOGISTIC_REGRESSION***

In [15]:
#import Logistic Regression 
from sklearn.linear_model import LogisticRegression
model_Lr=LogisticRegression()

In [16]:
#Traning & Testing
model_Lr.fit(X_train,y_train)
y_pred=model_Lr.predict(X_test)

In [17]:
#Accuracy
from sklearn import metrics

In [18]:
print("Accuracy_Score :",metrics.accuracy_score(y_test,y_pred))
print("Classification_Report :\n",metrics.classification_report(y_test,y_pred))
print('Confusion_Matrix :\n',metrics.confusion_matrix(y_test,y_pred))

Accuracy_Score : 0.8086666666666666
Classification_Report :
               precision    recall  f1-score   support

           0       0.83      0.97      0.89      2415
           1       0.53      0.16      0.25       585

    accuracy                           0.81      3000
   macro avg       0.68      0.56      0.57      3000
weighted avg       0.77      0.81      0.76      3000

Confusion_Matrix :
 [[2332   83]
 [ 491   94]]


In [19]:
#HYPER-TUNING in Logistic_Regression

Logistic regression does have not really have any critical hyperparameters to tune.

Sometime few of them are imp. which can inhace the performance

->solver in [‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’]

->penalty in [‘none’, ‘l1’, ‘l2’, ‘elasticnet’]

->C in [100, 10, 1.0, 0.1, 0.01]
C parameter controls the penality strength, which can also be effective

for all parameter of Logistic Regession :https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html


In [20]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [21]:
#defining the parametres:
model =LogisticRegression()
solvers=['newton-cg','lbfgs','liblinear','sag','saga']
penalty=['l1','l2','elasticnet']
c_values=[100,10,1.0,0.1,0.001]

GRID_CV

In [22]:
grid=dict(solver=solvers,penalty=penalty,C=c_values)

In [23]:
grid_search =GridSearchCV(estimator=model,param_grid=grid,n_jobs=-1,cv=5,scoring='accuracy',error_score=0)

In [24]:
grid_result=grid_search.fit(X_train,y_train)

In [25]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']

Best: 0.806714 using {'C': 100, 'penalty': 'l1', 'solver': 'liblinear'}


Randomised Search Cv

In [26]:
random=dict(solver=solvers,penalty=penalty,C=c_values)

In [27]:
random_cv=RandomizedSearchCV(estimator=model,param_distributions=random,n_iter=5,scoring='accuracy',n_jobs=-1,random_state=1,error_score=0,cv=10)

In [28]:
random_result=random_cv.fit(X_train,y_train)

In [29]:
print("Best: %f using %s" % (random_result.best_score_, random_result.best_params_))
means = random_result.cv_results_['mean_test_score']
stds = random_result.cv_results_['std_test_score']

Best: 0.806857 using {'solver': 'saga', 'penalty': 'l1', 'C': 10}


In [30]:
##Now if you get the more accuracy in hyper-tunning just copy those parameter and pass while making instance of algorithms
