In [126]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [127]:
###Implementing logistic regression using SK learn

In [128]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [129]:
df = pd.read_csv('https://drive.google.com/uc?export=view&id=1hltgTUHnnisOgPOTu9YWcvsnJHHA5D-Y')
column_to_remove = 'User ID'
df = df.drop(columns=[column_to_remove])



In [130]:
df.loc[df['Gender']=='Male','Gender'] = 1
df.loc[df['Gender']=='Female','Gender'] = 0
df

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0
...,...,...,...,...
395,0,46,41000,1
396,1,51,23000,1
397,0,50,20000,1
398,1,36,33000,0


In [131]:
### Splitting databases into dependent and independent features
X = df.iloc[:,:-1]
Y = df.iloc[:,-1]

#**Creating DataFrames raw, Normalised and standard**

**Raw DataSet**

In [132]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(
  X, Y, test_size=0.30, random_state=42)
X_train

Unnamed: 0,Gender,Age,EstimatedSalary
157,1,29,75000
109,0,38,80000
17,1,45,26000
347,0,54,108000
24,1,46,23000
...,...,...,...
71,0,24,27000
106,0,26,35000
270,0,43,133000
348,1,39,77000


**Normalised Dataset**

In [133]:
### Part - 2 normalised data
from sklearn.preprocessing import MinMaxScaler
# Initialize the MinMaxScaler
scaler = MinMaxScaler()

# Fit and transform the data
normalized_data = scaler.fit_transform(df)

# Create a new DataFrame with the normalized data
normalized_df = pd.DataFrame(normalized_data, columns=df.columns)
print("Original Data:")
print(df)
print("\nNormalized Data:")
print(normalized_df)
X_norm = normalized_df.iloc[:,:-1]
Y_norm = normalized_df.iloc[:,-1]

X_train_norm, X_test_norm, Y_train_norm, Y_test_norm = train_test_split(
  X_norm, Y_norm, test_size=0.30, random_state=42)

Original Data:
    Gender  Age  EstimatedSalary  Purchased
0        1   19            19000          0
1        1   35            20000          0
2        0   26            43000          0
3        0   27            57000          0
4        1   19            76000          0
..     ...  ...              ...        ...
395      0   46            41000          1
396      1   51            23000          1
397      0   50            20000          1
398      1   36            33000          0
399      0   49            36000          1

[400 rows x 4 columns]

Normalized Data:
     Gender       Age  EstimatedSalary  Purchased
0       1.0  0.023810         0.029630        0.0
1       1.0  0.404762         0.037037        0.0
2       0.0  0.190476         0.207407        0.0
3       0.0  0.214286         0.311111        0.0
4       1.0  0.023810         0.451852        0.0
..      ...       ...              ...        ...
395     0.0  0.666667         0.192593        1.0
396     1.0  0.

**Standardized Dataset**

In [134]:
###Part - 3
import pandas as pd
from sklearn.preprocessing import StandardScaler

columns_to_standardize = ['Age', 'EstimatedSalary']
data_to_standardize = df[columns_to_standardize]

scaler = StandardScaler()
standardized_data = scaler.fit_transform(data_to_standardize)

X_standard = pd.DataFrame(standardized_data, columns=columns_to_standardize)
X_standard['Gender'] = df['Gender']

X_train_stand, X_test_stand, Y_train_stand, Y_test_stand = train_test_split(
  X_standard, df['Purchased'], test_size=0.30, random_state=42)


# **Using SK Learn**

In [135]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression();

In [136]:
classifier.fit(X_train,Y_train)

In [137]:
###prediction
y_prediction =classifier.predict(X_test)

In [138]:
## accuracy score
from sklearn.metrics import accuracy_score,classification_report

In [139]:
score = accuracy_score(y_prediction,Y_test)
print(score)

0.6083333333333333


In [140]:
print(classification_report(y_prediction,Y_test))

              precision    recall  f1-score   support

           0       1.00      0.61      0.76       120
           1       0.00      0.00      0.00         0

    accuracy                           0.61       120
   macro avg       0.50      0.30      0.38       120
weighted avg       1.00      0.61      0.76       120



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [141]:
classifier.fit(X_train_norm,Y_train_norm)
y_prediction_norm = classifier.predict(X_test_norm)
score= accuracy_score(Y_test_norm,y_prediction_norm)
print(score)

0.8416666666666667


In [142]:
print(classification_report(Y_test_norm,y_prediction_norm))

              precision    recall  f1-score   support

         0.0       0.80      0.99      0.88        73
         1.0       0.97      0.62      0.75        47

    accuracy                           0.84       120
   macro avg       0.88      0.80      0.82       120
weighted avg       0.87      0.84      0.83       120



In [143]:
classifier.fit(X_train_stand,Y_train_stand)
y_prediction_stand = classifier.predict(X_test_stand)
score= accuracy_score(Y_test_stand,y_prediction_stand)
print(score)

0.8583333333333333


In [144]:
print(classification_report(Y_test_stand,y_prediction_stand))

              precision    recall  f1-score   support

           0       0.83      0.97      0.89        73
           1       0.94      0.68      0.79        47

    accuracy                           0.86       120
   macro avg       0.88      0.83      0.84       120
weighted avg       0.87      0.86      0.85       120

