In [1]:
from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

%matplotlib inline

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/anshul1004/DecisionTree/master/data_set/Social_Network_Ads.csv')

In [4]:
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


In [6]:
df.shape

(400, 5)

In [7]:
df.columns

Index(['User ID', 'Gender', 'Age', 'EstimatedSalary', 'Purchased'], dtype='object')

In [8]:
df = df.drop('User ID',axis=1)

# *Converting the  Gender Column*

In [10]:
df['Gender'] = df['Gender'].map({'Male':0, 'Female':1})

In [11]:
df.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,0,19,19000,0
1,0,35,20000,0
2,1,26,43000,0
3,1,27,57000,0
4,0,19,76000,0


# *Seperating the Independent and Dependent Variable* 

In [14]:
X = df.iloc[:,:-1]
y = df['Purchased']

In [15]:
X.head()

Unnamed: 0,Gender,Age,EstimatedSalary
0,0,19,19000
1,0,35,20000
2,1,26,43000
3,1,27,57000
4,0,19,76000


In [16]:
y.head()

0    0
1    0
2    0
3    0
4    0
Name: Purchased, dtype: int64

# *Perfoming the Train Test split*

In [17]:
from sklearn.model_selection import train_test_split

Xtrain,Xtest, ytrain,ytest = train_test_split(X,y, test_size=0.20, random_state=42)

In [18]:
Xtrain.shape, Xtest.shape

((320, 3), (80, 3))

In [19]:
ytrain.shape, ytest.shape

((320,), (80,))

## Scaling the  Xtest

In [20]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

In [21]:
Xtrain_scaled = scaler.fit_transform(Xtrain)

Xtest_scaled = scaler.fit_transform(Xtest)

# *Traning the logistic regression Model*

In [23]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

In [24]:
model.fit(Xtrain_scaled,ytrain)

In [26]:
ypred = model.predict(Xtest_scaled)

In [88]:
from sklearn.metrics import accuracy_score, classification_report

print(f"Accuracy Score :\n{accuracy_score(ypred,ytest)*100}")
print("\n==============================================================\n")
print(f"Classification Report :\n{classification_report(ypred,ytest)}")

Accuracy Score :
87.5


Classification Report :
              precision    recall  f1-score   support

           0       0.94      0.88      0.91        56
           1       0.75      0.88      0.81        24

    accuracy                           0.88        80
   macro avg       0.85      0.88      0.86        80
weighted avg       0.88      0.88      0.88        80



# *Traning the logistic regression Model with Hyperparameter Tunung*

In [77]:
rm = list(range(20,50))

In [80]:
from sklearn.model_selection import GridSearchCV

parameter = { 'penalty': ('l1', 'l2', 'elasticnet', None),
            "C": [1,10,20],
            'solver': ('lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'),
            'fit_intercept': (True,False),
            'random_state': [0,45]
}

clf = GridSearchCV(model, param_grid=parameter, cv=2)

In [82]:
## Spliting of train data train and validation

clf.fit(Xtrain_scaled,ytrain)

In [83]:
clf.best_params_

{'C': 1,
 'fit_intercept': True,
 'penalty': None,
 'random_state': 0,
 'solver': 'lbfgs'}

In [84]:
classifier = LogisticRegression(C=1, penalty=None, solver='lbfgs',fit_intercept=True,random_state=0)

In [85]:
classifier.fit(Xtrain_scaled,ytrain)

In [86]:
y_pred = classifier.predict(Xtest_scaled)

In [87]:
print(f"Accuracy Score :\n{accuracy_score(y_pred,ytest)}")
print("\n==============================================================\n")
print(f"Classification Report :\n{classification_report(y_pred,ytest)}")

Accuracy Score :
0.85


Classification Report :
              precision    recall  f1-score   support

           0       0.90      0.87      0.89        54
           1       0.75      0.81      0.78        26

    accuracy                           0.85        80
   macro avg       0.83      0.84      0.83        80
weighted avg       0.85      0.85      0.85        80

