In [None]:
import numpy as np
np.set_printoptions(suppress=True)
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

In [None]:
df = pd.read_csv("/kaggle/input/logistic-regression/Social_Network_Ads.csv")
df

In [None]:
# 1 for Male - 0 for Female
df['Gender'].replace(to_replace=['Male','Female'], value=[1,0],inplace=True)
df.head(5)

In [None]:
# Now shuffle the dataframe
df = df.sample(frac=1)
df.head(5)

In [None]:
plt.scatter(x=df['Age'],y=df['EstimatedSalary'],c=df['Purchased'])

In [None]:
#Normalize the data
sc=MinMaxScaler()
df_n=sc.fit_transform(df[['Gender','Age','EstimatedSalary']])
X_train,X_test,y_train,y_test = train_test_split(df_n,df['Purchased'])

In [None]:
#Cost Function
def cost_function(X, Y, theta):                 
    z = X @ theta
    predict_1 = (-Y * np.log(sigmoid(z)))
    predict_0 = (1 - Y) * np.log(1 - sigmoid(z))
    return sum(predict_1 - predict_0)/len(Y)

# Logistic Function
def sigmoid(x):
    return (1)/(1+np.exp(-x))

# Time to Learn
def learn(X,Y,alpha=1,epoch=1000):
    X = np.column_stack(([1]*X.shape[0],X))
    theta = np.zeros((X.shape[1]))
    ll=[]
    for e in range(epoch):
        hx = sigmoid((X @ theta))
        theta -= alpha*(X.T @ (hx-Y))/len(X)
        ll.append(cost_function(X,Y,theta))
    return ll,theta

# Time to predict
def predict(X,theta):
    X = np.column_stack(([1]*X.shape[0],X))
    return [(1 if i>0.5 else 0) for i in sigmoid((X@theta))]

In [None]:
ll,theta = learn(X_train,y_train)
y_predict = predict(X_test,theta)
print("accuracy_score::",accuracy_score(y_test,y_predict))

In [None]:
# cost function minimized
plt.plot(range(len(ll)),ll)
plt.xlabel("iteration")
plt.ylabel("cost function")
plt.show()

In [None]:
b = theta[0]
w1, w2 = theta[2],theta[3]

c = -b/w2
m=-w1/w2

xmin, xmax = -0.1, 1.1
ymin, ymax = -0.1,1.1
xd = np.array([xmin, xmax])
yd = m*xd + c

y_test_color = ['orange' if i==1 else 'teal' for i in y_test]
    

plt.plot(xd, yd, 'k', lw=1, ls='--')
plt.fill_between(xd, yd, ymin, color='tab:cyan', alpha=0.2)
plt.fill_between(xd, yd, ymax, color='tab:orange', alpha=0.2)
plt.scatter(x=X_test.T[1],y=X_test.T[2],c=y_test_color)
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
plt.show()