In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('../input/logistic-regression/Social_Network_Ads.csv')

In [None]:
df.head()

In [None]:
sns.distplot(df.Age, kde=False)

In [None]:
sns.distplot(df.EstimatedSalary)

In [None]:
sns.countplot(df.Gender)

In [None]:
sns.countplot(df.Purchased)

In [None]:
sns.boxplot(df.Age)

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
le =LabelEncoder()

In [None]:
df.Gender = le.fit_transform(df.Gender)

In [None]:
df.head()

In [None]:
X = df.drop(['User ID','Purchased'], axis=1)
y = df['Purchased']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
log = LogisticRegression()

In [None]:
log.fit(X_train, y_train)

In [None]:
pred = log.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
accuracy_score(y_test, pred)

In [None]:
confusion_matrix(y_test, pred)

In [None]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn import metrics

In [None]:
probs = log.predict_proba(X_test)
preds = probs[:,1]
fpr, tpr, threshold = metrics.roc_curve(y_test, preds)
logit_roc_auc = metrics.auc(fpr, tpr)

plt.plot(fpr, tpr, label = 'Logistic Regression Base (area = %0.2f)'%logit_roc_auc)
plt.plot([0,1], [0,1], 'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc='lower right')
plt.show()

# Hyperparameter Tuning

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
model = LogisticRegression(solver='liblinear')
params = {'penalty':['l1', 'l2'],
         'C':[0.001, 0.009, 0.01, 0.9, 1, 5, 10, 25]}
gridmodel = GridSearchCV(model, param_grid=params, scoring='recall', cv=10)
gridmodel.fit(X_train, y_train)

In [None]:
gridmodel.best_estimator_

In [None]:
gridmodel.best_params_

In [None]:
gridmodel.scorer_

In [None]:
y_predi = gridmodel.predict(X_test)

In [None]:
accuracy_score(y_test, y_predi)