Libraries

In [4]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import copy 
import math


Importing dataset

In [5]:
d_train = pd.read_csv("Classification_train.csv")

Training the data

In [6]:
def classes(cat):
    df = d_train.copy()
    df.label = df.label.apply(lambda x: int(x != cat)) 
    return df 

In [7]:
d_train.label.value_counts()

label
4    3118
6    3094
5    3018
1    3002
3    2993
9    2990
0    2979
7    2942
8    2936
2    2928
Name: count, dtype: int64

In [8]:
theta_initial = np.zeros(shape = 784)
lambda_tmp= 0.00

In [9]:
def sigmoid(x1, theta):
        z = np.matmul(x1,theta)
        h_theta = 1/(1+np.exp(-z))
        return h_theta

In [10]:
def predict(x1, theta):
   m =  x1.shape[0]
   h_theta = sigmoid(x1, theta)
   for i in range(m):
      if h_theta[i] >= 0.5:
            h_theta[i] = 1
      if h_theta[i] <= 0.5:
            h_theta[i] = 0
   return h_theta

In [11]:
def compute_cost(x, theta, y, lambda_):
    m = x.shape[0]
    z= np.dot(x, theta)
    h = 1/(1 + np.exp(-z))
    cost0 = np.matmul(np.transpose(y), np.log(h))
    cost1 = np.matmul(np.transpose(1-y), np.log(1-h))
    cost = -((cost1 + cost0))/m
    reguralization = np.matmul(np.transpose(theta),theta)*lambda_/(2*m)
    cost_reg = cost + reguralization
    return cost_reg

In [12]:
def gradient(X, y, theta):
    m = X.shape[0]
    y_pred = sigmoid(X, theta)
    error = y_pred - y
    dj_dtheta = np.matmul(np.transpose(X),error)
    dj_dtheta = dj_dtheta/m
    return dj_dtheta

In [13]:
def gradient_descent(x, y, theta,alpha, max_iters, lambda_):
    m = x.shape[0]
    J_history = []
    theta_curr = copy.deepcopy(theta)
    cost_curr = compute_cost(x, theta_curr, y, lambda_)
    iter_curr = 0
    while iter_curr< max_iters:
        dj_dtheta = gradient(x, y, theta_curr)
        theta_curr = (theta_curr - alpha * dj_dtheta) + theta_curr*lambda_/m
        cost_curr = compute_cost(x, theta_curr, y, lambda_)
        J_history.append(cost_curr)
        if iter_curr%100 == 0:
            print(f" iter:{iter_curr} cost_curr: {cost_curr}")
        iter_curr += 1
    return theta_curr, J_history, iter_curr

In [14]:
def function():
    cl = {}
    x_t = {}
    y_t = {}
    x_test = {}
    y_test = {}
    for i in range(0,10):
        cl[f'class{i}'] = classes(i)
        x_t[f'x_train{i}'] = cl[f'class{i}'][list(cl[f'class{i}'].columns.values)].drop(['label'], axis=1).to_numpy()[:21000,]
        y_t[f'y_train{i}'] = cl[f'class{i}'][['label']].to_numpy().reshape(30000,)[:21000,]
        x_test[f'x_test{i}'] = cl[f'class{i}'][list(cl[f'class{i}'].columns.values)].drop(['label'], axis=1).to_numpy()[21000:]
        y_test[f'y_test{i}'] = cl[f'class{i}'][['label']].to_numpy().reshape(30000,)[21000:]
    return cl, x_t, y_t, x_test, y_test

In [15]:
cl, x_train, y_train, x_test, y_test = function()

In [16]:
x_train_1 = x_train[f'x_train{1}']
y_train_1 = y_train[f'y_train{1}']

In [17]:
theta_initial = np.zeros(shape = 784)


Category 0 classification

In [18]:
lambda_tmp= 0.00
compute_cost(x_train[f'x_train{0}'], theta_initial, y_train[f'y_train{0}'], lambda_tmp)
alpha = 0.00001
max_iters = 1000
theta_final_0, Cost_history_0, num_iters = gradient_descent(x_train[f'x_train{0}'], y_train[f'y_train{0}'], theta_initial, alpha, max_iters, lambda_tmp)
print(f"theta found by gradient descent: {theta_final_0} ")
h0_theta = compute_cost(x_train[f'x_train{0}'], theta_final_0, y_train[f'y_train{0}'], lambda_tmp)
h0_theta

 iter:0 cost_curr: 0.39702212022738914
 iter:100 cost_curr: 0.0739427242603574
 iter:200 cost_curr: 0.05990984657770346
 iter:300 cost_curr: 0.05347007493547142
 iter:400 cost_curr: 0.049593241529554535
 iter:500 cost_curr: 0.04694002849617372
 iter:600 cost_curr: 0.044979413641551426
 iter:700 cost_curr: 0.04345374564759846
 iter:800 cost_curr: 0.04222133607033355
 iter:900 cost_curr: 0.04119723767138291
theta found by gradient descent: [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  2.68769769e-07  1.28036976e-06
  2.73229469e-06  3.92173045e-06  4.02538717e-06  7.96608170e-06
  1.56477671e-05  4.52624865e-06  1.26343844e-06  7.05822023e-07
  2.38679707e-07  3.03620442e-07  3.35058945e-07  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  2.92

0.040335239164373975

Category 1 classification

In [19]:
lambda_tmp= 0.00
compute_cost(x_train[f'x_train{1}'], theta_initial, y_train[f'y_train{1}'], lambda_tmp)
alpha = 0.00003
max_iters = 1000
theta_final_1, Cost_history_1, num_iters = gradient_descent(x_train[f'x_train{1}'], y_train[f'y_train{1}'], theta_initial, alpha, max_iters, lambda_tmp)
print(f"theta found by gradient descent: {theta_final_1} ")
h1_theta = compute_cost(x_train[f'x_train{1}'], theta_final_1, y_train[f'y_train{1}'], lambda_tmp)
h1_theta

 iter:0 cost_curr: 0.8880209116459631
 iter:100 cost_curr: 0.029528361666568984
 iter:200 cost_curr: 0.023721243932802106
 iter:300 cost_curr: 0.02096748631280235
 iter:400 cost_curr: 0.019247957582777895
 iter:500 cost_curr: 0.018030423942075435
 iter:600 cost_curr: 0.017102369131385835
 iter:700 cost_curr: 0.016359417187649987
 iter:800 cost_curr: 0.015743448027003382
 iter:900 cost_curr: 0.015219206617615747
theta found by gradient descent: [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  5.59873594e-07  1.31764650e-06
  1.74356640e-05  4.39745828e-05  2.94226518e-05  4.39498248e-05
  1.34822848e-04  5.44085521e-05  1.16326618e-05  1.16595744e-05
  4.25750046e-06  5.06226344e-06  6.11108258e-07  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00

0.014768179058163758

Category 2 classification

In [20]:
lambda_tmp= 0.00
compute_cost(x_train[f'x_train{2}'], theta_initial, y_train[f'y_train{2}'], lambda_tmp)
alpha = 0.00001
max_iters = 1000
theta_final_2, Cost_history_2, num_iters = gradient_descent(x_train[f'x_train{2}'], y_train[f'y_train{2}'], theta_initial, alpha, max_iters, lambda_tmp)
print(f"theta found by gradient descent: {theta_final_2} ")
h2_theta = compute_cost(x_train[f'x_train{2}'], theta_final_2, y_train[f'y_train{2}'], lambda_tmp)
h2_theta

 iter:0 cost_curr: 0.31406319932579047
 iter:100 cost_curr: 0.026858900998741775
 iter:200 cost_curr: 0.019281713164177365
 iter:300 cost_curr: 0.01605094391536054
 iter:400 cost_curr: 0.014157081840355697
 iter:500 cost_curr: 0.012876416532140146
 iter:600 cost_curr: 0.011936021863261518
 iter:700 cost_curr: 0.011206883897835317
 iter:800 cost_curr: 0.010619077396827274
 iter:900 cost_curr: 0.010131090681123188
theta found by gradient descent: [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  7.63586423e-07  3.43390593e-06
  8.86704934e-06  9.97766822e-06  8.24938852e-06  1.13668785e-05
  1.84560676e-05  1.33513531e-05  4.96976844e-06  1.32281219e-06
  4.21129904e-07  4.80738504e-07  2.11876421e-05  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+0

0.00972044989783824

Category 3 classification

In [21]:
lambda_tmp= 0.00
compute_cost(x_train[f'x_train{3}'], theta_initial, y_train[f'y_train{3}'], lambda_tmp)
alpha = 0.00001
max_iters = 1000
theta_final_3, Cost_history_3, num_iters = gradient_descent(x_train[f'x_train{3}'], y_train[f'y_train{3}'], theta_initial, alpha, max_iters, lambda_tmp)
print(f"theta found by gradient descent: {theta_final_3} ")
h3_theta = compute_cost(x_train[f'x_train{3}'], theta_final_3, y_train[f'y_train{3}'], lambda_tmp)
h3_theta

 iter:0 cost_curr: 0.3424409874742763
 iter:100 cost_curr: 0.08658125814480835
 iter:200 cost_curr: 0.0761737437205155
 iter:300 cost_curr: 0.07163054269677992
 iter:400 cost_curr: 0.06891599829522724
 iter:500 cost_curr: 0.06704513667597685
 iter:600 cost_curr: 0.065644886210616
 iter:700 cost_curr: 0.06453902452929926
 iter:800 cost_curr: 0.06363210579482646
 iter:900 cost_curr: 0.0628673422312418
theta found by gradient descent: [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  5.93491070e-06  1.00710998e-05
  1.99672661e-05  6.61097664e-05  5.92948679e-05  5.51398042e-05
  4.33992616e-05  2.32653789e-05  1.09042115e-05  4.92776341e-06
  5.98938716e-06  1.04375873e-05 -1.36825860e-05  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  8.91608684

0.06221469154961124

Category 4 classification

In [22]:
lambda_tmp= 0.00
compute_cost(x_train[f'x_train{4}'], theta_initial, y_train[f'y_train{4}'], lambda_tmp)
alpha = 0.00001
max_iters = 1000
theta_final_4, Cost_history_4, num_iters = gradient_descent(x_train[f'x_train{4}'], y_train[f'y_train{4}'], theta_initial, alpha, max_iters, lambda_tmp)
print(f"theta found by gradient descent: {theta_final_4} ")
h4_theta = compute_cost(x_train[f'x_train{4}'], theta_final_4, y_train[f'y_train{4}'], lambda_tmp)
h4_theta

 iter:0 cost_curr: 0.3444410616786424
 iter:100 cost_curr: 0.05543015790622708
 iter:200 cost_curr: 0.045520144308751655
 iter:300 cost_curr: 0.041125102307670254
 iter:400 cost_curr: 0.03848677155377825
 iter:500 cost_curr: 0.036670554834437326
 iter:600 cost_curr: 0.03531663713521138
 iter:700 cost_curr: 0.034253116754479354
 iter:800 cost_curr: 0.03338617121647054
 iter:900 cost_curr: 0.03265968777383444
theta found by gradient descent: [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  6.74351401e-07  1.04680983e-06
  5.48550614e-06  1.36617117e-05  1.35189335e-05  2.18277996e-05
  4.14293687e-05  6.79846114e-05  6.87641643e-06  3.90559092e-07
  1.21235262e-07  1.84849246e-07  7.38158516e-05  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  1.

0.03204356610578825

Category 5 classification

In [23]:
lambda_tmp= 0.00
compute_cost(x_train[f'x_train{5}'], theta_initial, y_train[f'y_train{5}'], lambda_tmp)
alpha = 0.00001
max_iters = 1000
theta_final_5, Cost_history_5, num_iters = gradient_descent(x_train[f'x_train{5}'], y_train[f'y_train{5}'], theta_initial, alpha, max_iters, lambda_tmp)
print(f"theta found by gradient descent: {theta_final_5} ")
h5_theta = compute_cost(x_train[f'x_train{5}'], theta_final_5, y_train[f'y_train{5}'], lambda_tmp)
h5_theta

 iter:0 cost_curr: 0.40659305032423065
 iter:100 cost_curr: 0.07962084362331401
 iter:200 cost_curr: 0.06513055703466307
 iter:300 cost_curr: 0.0585211849427007
 iter:400 cost_curr: 0.05441802697816127
 iter:500 cost_curr: 0.051502845259381175
 iter:600 cost_curr: 0.049269114632760506
 iter:700 cost_curr: 0.04747307486474212
 iter:800 cost_curr: 0.04598009499232316
 iter:900 cost_curr: 0.0447085062313984
theta found by gradient descent: [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  1.91068711e-05  1.75989565e-05
  3.00099236e-05  5.21702146e-05  5.48861583e-05  7.35878124e-05
  7.50064879e-05  7.02898015e-05  3.95326950e-05  1.10168433e-06
  1.26938212e-07  1.79159527e-07  5.18399482e-05  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  2.102

0.043615587590593884

Category 6 classification

In [24]:
lambda_tmp= 0.00
compute_cost(x_train[f'x_train{6}'], theta_initial, y_train[f'y_train{6}'], lambda_tmp)
alpha = 0.00001
max_iters = 1000
theta_final_6, Cost_history_6, num_iters = gradient_descent(x_train[f'x_train{6}'], y_train[f'y_train{6}'], theta_initial, alpha, max_iters, lambda_tmp)
print(f"theta found by gradient descent: {theta_final_6} ")
h6_theta = compute_cost(x_train[f'x_train{6}'], theta_final_6, y_train[f'y_train{6}'], lambda_tmp)
h6_theta

 iter:0 cost_curr: 0.3099602210777581
 iter:100 cost_curr: 0.058985589327849704
 iter:200 cost_curr: 0.04901415164279025
 iter:300 cost_curr: 0.04461958022844503
 iter:400 cost_curr: 0.04198965761998524
 iter:500 cost_curr: 0.04017794355748017
 iter:600 cost_curr: 0.03882403191729343
 iter:700 cost_curr: 0.03775728970485077
 iter:800 cost_curr: 0.03688519239428806
 iter:900 cost_curr: 0.03615261823236843
theta found by gradient descent: [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -6.23248166e-05 -9.71248986e-05
 -1.55186709e-04 -9.56305125e-05 -2.14332570e-04 -3.93465716e-04
 -4.02500115e-04 -1.83523443e-04  1.97676991e-05  1.42050665e-04
  4.83779180e-05  5.09242430e-05  6.55985549e-06  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  6.725

0.035530208354424414

Category 7 classification

In [25]:
lambda_tmp= 0.00
compute_cost(x_train[f'x_train{7}'], theta_initial, y_train[f'y_train{7}'], lambda_tmp)
alpha = 0.00001
max_iters = 1000
theta_final_7, Cost_history_7, num_iters = gradient_descent(x_train[f'x_train{7}'], y_train[f'y_train{7}'], theta_initial, alpha, max_iters, lambda_tmp)
print(f"theta found by gradient descent: {theta_final_7} ")
h7_theta = compute_cost(x_train[f'x_train{7}'], theta_final_7, y_train[f'y_train{7}'], lambda_tmp)
h7_theta

 iter:0 cost_curr: 0.3236480674611497
 iter:100 cost_curr: 0.09607894076816237
 iter:200 cost_curr: 0.08400198785713525
 iter:300 cost_curr: 0.07835910127309954
 iter:400 cost_curr: 0.07488265665632829
 iter:500 cost_curr: 0.07244321900920243
 iter:600 cost_curr: 0.07059470700995109
 iter:700 cost_curr: 0.0691215610887868
 iter:800 cost_curr: 0.06790542302553843
 iter:900 cost_curr: 0.06687515917756254
theta found by gradient descent: [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  1.66257301e-05  1.21936181e-04
  1.97038324e-04 -1.14001142e-04 -5.78000254e-05  7.70823859e-05
  8.58844401e-05  1.69642014e-05 -1.12707931e-04 -2.06517787e-04
 -7.82920234e-05 -9.27400445e-05  7.58823826e-06  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -1.06935

0.06599333293198265

Category 8 classification

In [26]:
lambda_tmp= 0.00
compute_cost(x_train[f'x_train{8}'], theta_initial, y_train[f'y_train{8}'], lambda_tmp)
alpha = 0.00001
max_iters = 1000
theta_final_8, Cost_history_8, num_iters = gradient_descent(x_train[f'x_train{8}'], y_train[f'y_train{8}'], theta_initial, alpha, max_iters, lambda_tmp)
print(f"theta found by gradient descent: {theta_final_8} ")
h8_theta = compute_cost(x_train[f'x_train{8}'], theta_final_8, y_train[f'y_train{8}'], lambda_tmp)
h8_theta

 iter:0 cost_curr: 0.34439102191179544
 iter:100 cost_curr: 0.04433038949651539


KeyboardInterrupt: 

Category 9 classification

In [None]:
lambda_tmp= 0.00
compute_cost(x_train[f'x_train{9}'], theta_initial, y_train[f'y_train{9}'], lambda_tmp)
alpha = 0.00001
max_iters = 1000
theta_final_9, Cost_history_9, num_iters = gradient_descent(x_train[f'x_train{9}'], y_train[f'y_train{9}'], theta_initial, alpha, max_iters, lambda_tmp)
print(f"theta found by gradient descent: {theta_final_9} ")
h9_theta = compute_cost(x_train[f'x_train{9}'], theta_final_9, y_train[f'y_train{9}'], lambda_tmp)
h9_theta

 iter:0 cost_curr: 0.3223815572198538
 iter:100 cost_curr: 0.04333107996377652
 iter:200 cost_curr: 0.034382211488824185
 iter:300 cost_curr: 0.03051437646306629
 iter:400 cost_curr: 0.028194020711480788
 iter:500 cost_curr: 0.026581398677271464
 iter:600 cost_curr: 0.025364010265974624
 iter:700 cost_curr: 0.02439543912476865
 iter:800 cost_curr: 0.023596552608853037
 iter:900 cost_curr: 0.0229201692446669
theta found by gradient descent: [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  4.96904913e-07  5.50873323e-07
  1.43820584e-06  5.90317081e-06  6.10847393e-06  1.20272978e-05
  2.27171810e-05  6.39942377e-06  1.62766236e-06  3.71567972e-07
  1.79116388e-07  2.88586230e-07  1.68200690e-05  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  1.

0.022341512857704433

In [None]:
def max_cost():
    thetas = [h0_theta, h1_theta, h2_theta, h3_theta, h4_theta, h5_theta, h6_theta, h7_theta, h8_theta, h9_theta]
    max_index = thetas.index(max(thetas))
    print(f"h{max_index}_theta")

# Example usage
max_cost()

h7_theta


In [27]:
y_pred = predict(x_train[f'x_train{7}'], theta_final_7)
y_pred

array([1., 1., 1., ..., 0., 1., 1.])

In [28]:
y_pred_test = predict(x_test[f'x_test{7}'], theta_final_7)
y_pred_test

array([1., 1., 1., ..., 1., 1., 1.])

In [29]:
y_test[f'y_test{7}']

array([1, 1, 1, ..., 1, 1, 1], dtype=int64)

In [36]:
def R2_score(x, y, theta):
    y_pred = predict(x, theta)
    error = y_pred - y 
    y_mean = np.mean(y)
    y1 = y - y_mean
    rss = np.matmul(np.transpose(error),error)
    tss = np.matmul(np.transpose(y1),y1)
    rss_tss = rss/tss
    r2 = 1 - rss_tss
    return r2


In [37]:
def accuracy(y_true, y_pred):
    y_pred_binary = np.round(y_pred)
    correct_predictions = np.sum(y_true == y_pred_binary)
    accuracy = correct_predictions / len(y_true)
    return accuracy

In [38]:
accuracy(y_train[f'y_train{7}'], y_pred)

0.9805238095238096

In [39]:
accuracy(y_test[f'y_test{7}'], y_pred_test)

0.9802222222222222

Test Data

In [41]:
d_test = pd.read_csv("Classification_test.csv")

In [42]:
X_test_g = d_test[d_test.columns.values]
X_test_g = X_test_g.drop(['ID'], axis=1)
X_test_g = X_test_g.to_numpy()

In [43]:
predictions_test = predict(X_test_g, theta_final_5)
predictions_test

array([1., 1., 1., ..., 1., 1., 1.])

In [44]:
insert_index = 0
modified_array = np.insert(X_test_g, insert_index, predictions_test, axis=1)

In [45]:
df = pd.DataFrame(modified_array)
file_path = 'Logistic_regression.csv'
df.to_csv(file_path, index=False, header=False)