In [None]:
import pandas as pd
import numpy as np

In [None]:
col_names = ['x1', 'x2', 'result']

df = pd.read_csv("./Logistic_Regression_Dataset_Students.csv", header=None, names=col_names)
df

In [None]:
# extract the input features from the data frame and convert them to a NumPy array
x = df[['x1', 'x2']].values
x

In [None]:
# extract the output labels from the data frame and convert them to a NumPy array
y = df['result'].values
y

In [None]:
# standardizing the input features so they have zero mean and variance
from sklearn import preprocessing
xp = preprocessing.scale(x)

In [None]:
from sklearn.model_selection import KFold, train_test_split

# training the logistic regression model using the training data and the stochastic gradient descent optimization algorithm
# we split the data into 5 folds using kfold so we cross verify
kf = KFold(n_splits=5)

# training and testing the logistic regression model using the stochastic gradient descent algorithm. model's weights (b0, b1, and b2) are updated at each epoch until the number of epochs (iterations) is exhausted. The alpha parameter is the learning rate, which controls the size of the weight update
for train_index, test_index in kf.split(xp):
    xtrain, xtest, ytrain, ytest = train_test_split(xp, y, test_size = 0.20, random_state=0)
    x1 = xtrain[:, 0]
    x2 = xtrain[:, 1]
    b0 = 0.0
    b1 = 0.0
    b2 = 0.0
    epoch=1000
    alpha=0.001
    while(epoch > 0):
        for i in range(len(xtrain)):
            prediction = 1/(1 + np.exp(-b0 + b1*x1[i] + b2*x2[i]))
            b0 = b0 + alpha*(ytrain[i] - prediction) * prediction * ( 1 - prediction) * 1.0
            b1 = b1 + alpha*(ytrain[i] - prediction) * prediction * ( 1 - prediction) * x1[i]
            b2 = b2 + alpha*(ytrain[i] - prediction) * prediction * ( 1 - prediction) * x2[i]
        epoch = epoch - 1

print(b0)
print(b1)
print(b2)

In [None]:
# using the trained model to make predictions on the test set
final_predictions = []

x3 = xtest[:, 0]
x4 = xtest[:, 1]

print(ytest)

In [None]:
y_pred = [0] * len(xtest)

for i in range(len(xtest)):
    y_pred[i] = np.round( 1 / 1 + np.exp(-(b0 + b1*x3[i] + b2*x4[i])))
    final_predictions.append(np.ceil(y_pred[i]))

print(final_predictions)

In [None]:
from sklearn.metrics import accuracy_score

print("accuracy : ",accuracy_score(ytest,y_pred))