In [1]:
#importing libraries
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("BankNote.csv")
df.shape

(1096, 5)

In [3]:
df.info()
df.isnull().sum()
#Since no missing value, no need of preprocessing

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1096 entries, 0 to 1095
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   VWTI    1096 non-null   float64
 1   SWTI    1096 non-null   float64
 2   CWTI    1096 non-null   float64
 3   EI      1096 non-null   float64
 4   Class   1096 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 42.9 KB


VWTI     0
SWTI     0
CWTI     0
EI       0
Class    0
dtype: int64

In [4]:
split_point = int(0.8 * len(df))
train_data = df[:split_point]
test_data = df[split_point:]

In [5]:
#Dividing the dataset into 80-20
X_train = train_data[["VWTI", "SWTI", "CWTI", 'EI']]
Y_train = train_data["Class"]
X_test = test_data[["VWTI", "SWTI", "CWTI", 'EI']]
Y_test = test_data["Class"]

print(X_train.shape)
print(Y_train.shape)

(876, 4)
(876,)


In [6]:
#Scaling both the training and testing input
X_train = (X_train - X_train.mean())/X_train.std()
X_test = (X_test - X_test.mean())/X_test.std()
print(X_train)
print(X_test)

         VWTI      SWTI      CWTI        EI
0    0.657954 -1.012239  0.452235  0.298930
1    1.005168  0.012569  0.112062  0.875660
2   -1.478392 -2.384066  2.529880 -0.971344
3    0.057491 -0.812358  0.327120  0.123731
4    0.763454 -0.999392  0.966968  0.623798
..        ...       ...       ...       ...
871  1.311167  1.554857 -1.153826 -1.333326
872  1.193823  1.191265 -1.323813 -1.310179
873 -0.689998 -1.668046  1.657742  1.169499
874  0.923048 -0.702765  0.156735  0.729654
875  0.370170  1.091495 -0.819309 -0.338651

[876 rows x 4 columns]
          VWTI      SWTI      CWTI        EI
876  -0.102867 -0.211592 -0.896259  0.421636
877   0.544818 -1.422919  0.882849 -0.240501
878  -1.185864 -0.428639  0.482020  0.684151
879  -1.068011 -1.784403  1.220745  0.356181
880   1.433277 -1.287466  0.580730  0.427148
...        ...       ...       ...       ...
1091  0.310531  0.290648  0.098157  0.230776
1092 -0.336858 -0.725248  2.016847  0.827464
1093  0.809508  1.051964 -0.545135 -0.04620

In [7]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [8]:
def logistic_regression(X,Y,learning_rate,iterations):
    m,n = X.shape
    weight = np.zeros(n)
    bias = 0
    
    for i in range(iterations):
        y_pred = sigmoid(np.dot(X, weight) + bias)
        
        dw = 1/m * np.dot(X.T, (y_pred - Y))
        db = 1/m * np.sum(y_pred - Y)
        
        weight = weight - dw*learning_rate
        bias = bias - db*learning_rate
        
    return weight, bias
        

In [9]:
def accuracy(y_pred, y_actual):
    acc = np.mean(y_pred == y_actual)*100
    return acc

In [10]:
weight, bias = logistic_regression(X_train,Y_train,0.01,50000)
print(f"Calculated weights: {weight}")
print(f"Calculated bias: {bias}")

y_pred = sigmoid(np.dot(X_test, weight) + bias)
y_pred_binary = (y_pred >= 0.5).astype(int)
acc = accuracy(y_pred_binary, Y_test)
print(f"Accuracy for testing data: {acc}")

Calculated weights: [-5.11300389 -5.3044502  -4.95325597  0.33779717]
Calculated bias: -1.37091532996004
Accuracy for testing data: 98.63636363636363
