<a href="https://colab.research.google.com/github/nirmal428/MachineLearning/blob/main/LogisticRegresssion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Logistic Regression

Logistic Regression is a statistical and machine learning technique used for **classification problems,** not regression, despite its name. It predicts the probability that a given input belongs to a certain category.


Type: Supervised learning algorithm.

Purpose: Used for classification (binary or multi-class).

Output: Probability (between 0 and 1), often converted to classes using a threshold (e.g., 0.5).

Activation Function: Uses the sigmoid (logistic) function for binary classification and softmax for multi-class.

In [57]:
from sklearn.datasets import load_iris

In [58]:
data=load_iris()

In [59]:
data.DESCR



In [60]:
data.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [61]:
import pandas as pd
import numpy as np

In [62]:
df=pd.DataFrame(data.data,columns=data.feature_names)

In [63]:
df['target']=data.target

In [64]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [65]:
x=df
y=data.target

In [66]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [67]:
#Binary classificaton
df_copy=df[df['target']!=2]

In [68]:
df_copy.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [69]:
x=df_copy.iloc[:,:-1]
y=df_copy.iloc[:,-1]

In [70]:
from sklearn.linear_model import LogisticRegression

In [71]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [72]:
classifier=LogisticRegression()

In [73]:
classifier.fit(x_train,y_train)

In [74]:
classifier.predict_proba(x_test)

array([[0.00118455, 0.99881545],
       [0.01579803, 0.98420197],
       [0.00304709, 0.99695291],
       [0.96959941, 0.03040059],
       [0.94269372, 0.05730628],
       [0.97161144, 0.02838856],
       [0.99356322, 0.00643678],
       [0.0316902 , 0.9683098 ],
       [0.97460702, 0.02539298],
       [0.9789308 , 0.0210692 ],
       [0.95512884, 0.04487116],
       [0.96072669, 0.03927331],
       [0.00430743, 0.99569257],
       [0.98586374, 0.01413626],
       [0.00927792, 0.99072208],
       [0.98146423, 0.01853577],
       [0.00207948, 0.99792052],
       [0.00125531, 0.99874469],
       [0.97461564, 0.02538436],
       [0.96116169, 0.03883831]])

In [75]:
y_pred=classifier.predict(x_test)

In [76]:
y_pred

array([1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0])

In [77]:
# Confusion matrics , accuracy score , classifiction report
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [78]:
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[12  0]
 [ 0  8]]
1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



Hyperparameter Tuning



1.   GridSearchCV



In [79]:
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

In [86]:
parameter={'penalty':('l1','l2','elasticnet',None),'C':[1,10,20]}

In [87]:
clf=GridSearchCV(classifier,param_grid=parameter,cv=5)

In [88]:
clf.fit(x_train,y_train)

In [89]:
clf.best_params_

{'C': 1, 'penalty': 'l2'}

In [90]:
classifier=LogisticRegression(C=1,penalty='l2')

In [91]:
classifier.fit(x_train,y_train)

In [92]:
y_pred=classifier.predict(x_test)
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[12  0]
 [ 0  8]]
1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20






2.   Randomized Search CV



In [93]:
from sklearn.model_selection import RandomizedSearchCV

In [94]:
random_clf=RandomizedSearchCV(LogisticRegression(),param_distributions=parameter,cv=5)

In [96]:
random_clf.fit(x_train,y_train)

In [97]:
random_clf.best_params_

{'penalty': None, 'C': 10}

In [98]:
y_pred=classifier.predict(x_test)
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[12  0]
 [ 0  8]]
1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



# Logistic Regressin for MultiClass Clasification

In [99]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression

x,y=make_classification(n_samples=1000,n_features=5,n_classes=3,n_informative=3,random_state=42)

In [101]:
x

array([[ 1.55572975,  0.6281466 , -0.56727761, -2.5487865 , -1.5759087 ],
       [-3.19405179,  1.39563613,  3.03394286, -0.2802551 , -2.17230858],
       [-0.32906375, -0.46175963, -1.18311957,  1.4364098 , -1.22878966],
       ...,
       [ 1.20725533,  0.39664756,  0.09314482, -1.91477271,  0.19535975],
       [ 1.01830287, -0.73030323, -1.84204947,  0.81498396, -0.13300606],
       [-0.37602858, -0.62776716, -0.77757327,  1.7138004 ,  0.13127295]])

In [102]:
y

array([2, 1, 0, 1, 0, 1, 0, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 0, 2, 1, 0,
       1, 2, 0, 1, 2, 1, 2, 0, 1, 2, 1, 1, 0, 1, 1, 1, 1, 0, 0, 2, 1, 0,
       2, 2, 2, 1, 0, 0, 1, 2, 1, 2, 0, 0, 0, 1, 1, 1, 0, 1, 1, 2, 1, 0,
       1, 0, 1, 0, 2, 0, 0, 0, 2, 2, 0, 1, 1, 1, 0, 1, 2, 2, 1, 2, 0, 1,
       2, 2, 0, 0, 1, 1, 0, 2, 0, 1, 1, 1, 0, 0, 0, 0, 2, 1, 2, 2, 2, 2,
       2, 0, 2, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 0, 1, 2, 1, 1, 1, 2, 2, 2,
       2, 0, 0, 2, 0, 0, 0, 1, 1, 0, 2, 0, 2, 2, 2, 1, 2, 2, 0, 1, 0, 2,
       1, 0, 2, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0,
       2, 1, 1, 0, 0, 0, 1, 1, 2, 0, 1, 2, 0, 2, 2, 0, 1, 1, 2, 1, 2, 1,
       1, 0, 2, 2, 2, 1, 2, 1, 2, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 2,
       2, 1, 0, 2, 0, 2, 2, 0, 0, 2, 0, 2, 2, 2, 2, 1, 1, 1, 1, 2, 1, 0,
       0, 1, 0, 2, 0, 1, 2, 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 2, 0, 2, 1, 2,
       0, 0, 0, 0, 2, 0, 1, 2, 2, 2, 1, 2, 0, 1, 2, 1, 1, 1, 0, 0, 1, 2,
       0, 0, 1, 1, 1, 0, 1, 2, 0, 1, 2, 2, 0, 0, 0,

In [100]:
model=LogisticRegression(multi_class='multinomial',solver='lbfgs')

In [103]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [104]:
model.fit(x_train,y_train)

In [105]:
y_pred=model.predict(x_test)

In [106]:
y_pred

array([0, 0, 1, 0, 1, 2, 2, 1, 2, 0, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 0, 2,
       2, 1, 2, 2, 1, 0, 0, 0, 2, 2, 0, 0, 0, 2, 1, 1, 2, 1, 0, 0, 1, 0,
       0, 1, 1, 0, 0, 0, 2, 0, 2, 2, 2, 2, 1, 2, 1, 2, 0, 1, 2, 0, 0, 2,
       0, 0, 2, 1, 1, 0, 0, 2, 2, 2, 0, 0, 0, 1, 2, 1, 2, 0, 0, 2, 2, 0,
       2, 0, 2, 0, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 1, 0, 1, 2, 1, 2, 0, 0,
       0, 2, 0, 2, 0, 1, 2, 1, 2, 1, 1, 1, 1, 0, 1, 2, 1, 1, 0, 1, 1, 2,
       0, 2, 2, 1, 1, 0, 0, 0, 2, 1, 0, 0, 2, 1, 2, 1, 1, 0, 0, 2, 0, 0,
       2, 2, 2, 1, 0, 0, 2, 1, 0, 2, 2, 2, 0, 2, 2, 2, 0, 1, 1, 1, 2, 0,
       0, 2, 2, 2, 0, 0, 1, 2, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 2, 0, 0, 2,
       0, 1])

In [108]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[44 22  1]
 [25 31 17]
 [ 1  7 52]]
0.635
              precision    recall  f1-score   support

           0       0.63      0.66      0.64        67
           1       0.52      0.42      0.47        73
           2       0.74      0.87      0.80        60

    accuracy                           0.64       200
   macro avg       0.63      0.65      0.64       200
weighted avg       0.62      0.64      0.63       200

