In [40]:
## Lets see more complex data

# make a prediction logistic regression model
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
# define dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, n_classes=2, random_state=1)

In [41]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [42]:
from sklearn.metrics import r2_score

In [43]:
classification=LogisticRegression()
classification.fit(X_train,y_train) #Creates best fit line considering only train data

In [44]:
#Calculating adjusted r2 accuracy
y_pred_test=classification.predict(X_test)
r2_accuracy=r2_score(y_test,y_pred_test)
r2_accuracy*100

15.314043754410722

## HyperParameter tunning using penalty and C combination

### GridsearchCV

In [31]:
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

In [32]:
parameters={'penalty':('l1', 'l2','elasticnet'),'C':[1,10,20,30]}

In [33]:
classifier=LogisticRegression()

In [34]:
clf=GridSearchCV(classifier,param_grid=parameters,cv=5)

In [35]:
#Splitting training data to train anf validation
clf.fit(X_train,y_train)

In [23]:
clf.best_params_

{'C': 1, 'penalty': 'l2'}

In [24]:
#Therefore best parameter got selected in hyperparametertunning is C=1 and Penalty=l2

In [45]:
clf.best_score_*100 #Got 80% of accuracy

80.87500000000001

In [46]:
###Therefore we got to know for which paramter i am getting best value

In [47]:
classfier=LogisticRegression(C=1,penalty='l2')

In [49]:
classfier.fit(X_train,y_train)

In [50]:
y_pred=classfier.predict(X_test)

In [51]:
y_pred

array([0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       0, 1])

In [53]:
classfier.predict_proba(X_test) ### Therefore we can see like example for first
#datapoint the 0 probablity is 71percent and 1probablity is 28percent so 0 got selected 
#due to more probablity

array([[0.71649202, 0.28350798],
       [0.19508969, 0.80491031],
       [0.12418141, 0.87581859],
       [0.05045906, 0.94954094],
       [0.88775659, 0.11224341],
       [0.75067497, 0.24932503],
       [0.97980488, 0.02019512],
       [0.3921745 , 0.6078255 ],
       [0.59920135, 0.40079865],
       [0.39295203, 0.60704797],
       [0.20428696, 0.79571304],
       [0.80257879, 0.19742121],
       [0.86422932, 0.13577068],
       [0.92665682, 0.07334318],
       [0.00131743, 0.99868257],
       [0.04171096, 0.95828904],
       [0.56288536, 0.43711464],
       [0.89322764, 0.10677236],
       [0.29278211, 0.70721789],
       [0.00870994, 0.99129006],
       [0.71879454, 0.28120546],
       [0.5108267 , 0.4891733 ],
       [0.76230298, 0.23769702],
       [0.73170811, 0.26829189],
       [0.10155737, 0.89844263],
       [0.04046512, 0.95953488],
       [0.57926768, 0.42073232],
       [0.00526468, 0.99473532],
       [0.03101648, 0.96898352],
       [0.96093035, 0.03906965],
       [0.

In [54]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [57]:
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[78 13]
 [29 80]]
0.79
              precision    recall  f1-score   support

           0       0.73      0.86      0.79        91
           1       0.86      0.73      0.79       109

    accuracy                           0.79       200
   macro avg       0.79      0.80      0.79       200
weighted avg       0.80      0.79      0.79       200



## Randomized Search cv

In [58]:
from sklearn.model_selection import RandomizedSearchCV

In [59]:
random_clf=RandomizedSearchCV(LogisticRegression(),param_distributions=parameters,cv=5,n_iter=20)

In [60]:
random_clf

In [61]:
random_clf.fit(X_train,y_train)

In [62]:
random_clf.best_params_

{'penalty': 'l2', 'C': 1}

In [63]:
###Here also got same parameters

## HyperParameter tunning using penalyt and solver combination

In [68]:
parameters={"penalty":('l1', 'l2','elasticnet'),'solver':('lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga')}

In [69]:
classifier=LogisticRegression()

In [70]:
clf=GridSearchCV(classifier,param_grid=parameters,cv=5)

In [71]:
#Splitting training data to train anf validation
clf.fit(X_train,y_train)

In [72]:
clf.best_params_

{'penalty': 'l1', 'solver': 'liblinear'}

In [73]:
#Therefore best parameter got selected in hyperparametertunning is penalty=l1 and solver=libnear

In [74]:
clf.best_score_*100 #Got 80% of accuracy

80.87500000000001

In [75]:
classfier=LogisticRegression(C=1,penalty='l2')

In [76]:
classfier.fit(X_train,y_train)

In [77]:
y_pred=classfier.predict(X_test)

In [78]:
y_pred

array([0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       0, 1])

In [79]:
classfier.predict_proba(X_test) ### Therefore we can see like example for first
#datapoint the 0 probablity is 71percent and 1probablity is 28percent so 0 got selected 
#due to more probablity

array([[0.71649202, 0.28350798],
       [0.19508969, 0.80491031],
       [0.12418141, 0.87581859],
       [0.05045906, 0.94954094],
       [0.88775659, 0.11224341],
       [0.75067497, 0.24932503],
       [0.97980488, 0.02019512],
       [0.3921745 , 0.6078255 ],
       [0.59920135, 0.40079865],
       [0.39295203, 0.60704797],
       [0.20428696, 0.79571304],
       [0.80257879, 0.19742121],
       [0.86422932, 0.13577068],
       [0.92665682, 0.07334318],
       [0.00131743, 0.99868257],
       [0.04171096, 0.95828904],
       [0.56288536, 0.43711464],
       [0.89322764, 0.10677236],
       [0.29278211, 0.70721789],
       [0.00870994, 0.99129006],
       [0.71879454, 0.28120546],
       [0.5108267 , 0.4891733 ],
       [0.76230298, 0.23769702],
       [0.73170811, 0.26829189],
       [0.10155737, 0.89844263],
       [0.04046512, 0.95953488],
       [0.57926768, 0.42073232],
       [0.00526468, 0.99473532],
       [0.03101648, 0.96898352],
       [0.96093035, 0.03906965],
       [0.

In [80]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [81]:
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[78 13]
 [29 80]]
0.79
              precision    recall  f1-score   support

           0       0.73      0.86      0.79        91
           1       0.86      0.73      0.79       109

    accuracy                           0.79       200
   macro avg       0.79      0.80      0.79       200
weighted avg       0.80      0.79      0.79       200



## Randomized Search cv

In [82]:
from sklearn.model_selection import RandomizedSearchCV

In [83]:
random_clf=RandomizedSearchCV(LogisticRegression(),param_distributions=parameters,cv=5,n_iter=20)

In [84]:
random_clf

In [85]:
random_clf.fit(X_train,y_train)

In [86]:
random_clf.best_params_

{'solver': 'liblinear', 'penalty': 'l1'}

In [87]:
###Here also got same parameters