In [1]:
# Importing required modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm
%matplotlib notebook


In [2]:
df = pd.read_csv('dataset1.csv')

# Replace missing values in all columns with mean values
mean_values = df.mean()
df = df.fillna(mean_values)

# M -> +ve, B -> -ve
df['diagnosis'] = df['diagnosis'].replace({'M': 1, 'B': 0})

#remove the 'id' column
df = df.drop('id', axis=1)

# Move column 'diagnosis' to the end
col = df.pop('diagnosis')
df.insert(len(df.columns), 'diagnosis', col)

# Rename column 'diagnosis' to 'Class'
#df = df.rename(columns={'diagnosis': 'Class'})

num_data, num_columns = df.shape
num_features = num_columns - 1
head = [f'feature {i}' for i in range(1, num_columns)] + ['Class']
df.columns = head
df

df

Unnamed: 0,feature 1,feature 2,feature 3,feature 4,feature 5,feature 6,feature 7,feature 8,feature 9,feature 10,...,feature 22,feature 23,feature 24,feature 25,feature 26,feature 27,feature 28,feature 29,feature 30,Class
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,1
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,1
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,1
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,1
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,1
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,1
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,1
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,1


In [3]:
# define the columns to be normalized
cols_to_normalize = [f'feature {i}' for i in range(1, num_columns)]

# normalize the selected columns using NumPy
df[cols_to_normalize] = (df[cols_to_normalize] - df[cols_to_normalize].mean()) / df[cols_to_normalize].std()

df

Unnamed: 0,feature 1,feature 2,feature 3,feature 4,feature 5,feature 6,feature 7,feature 8,feature 9,feature 10,...,feature 22,feature 23,feature 24,feature 25,feature 26,feature 27,feature 28,feature 29,feature 30,Class
0,1.102422,-2.071512,1.268389,0.983510,1.567087,3.280628,2.650837,2.530249,2.215566,2.253764,...,-1.358098,2.301575,1.998658,1.306537,2.614365,2.107374,2.294058,2.748204,1.935312,1
1,1.836635,-0.353322,1.684639,1.907030,-0.826235,-0.486643,-0.025417,0.547662,0.001391,-0.867889,...,-0.368879,1.533776,1.887973,-0.375282,-0.430066,-0.147968,1.086129,-0.243675,0.280943,1
2,1.586206,0.455786,1.565122,1.557513,0.941382,1.052000,1.361666,2.035440,0.938859,-0.397658,...,-0.023953,1.346291,1.454016,0.526944,1.081980,0.853340,1.953282,1.151242,0.201214,1
3,-0.767260,0.253509,-0.595257,-0.763792,3.280667,3.399917,1.913989,1.450431,2.864862,4.906602,...,0.133866,-0.249720,-0.551147,3.391291,3.889975,1.987485,2.173873,6.040726,4.930672,1
4,1.756953,-1.150804,1.775308,1.824624,0.280125,0.538866,1.369198,1.427237,-0.009552,-0.561956,...,-1.465481,1.337363,1.218589,0.220362,-0.313119,0.611645,0.728618,-0.867590,-0.396751,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,2.118368,0.720838,2.059677,2.341795,1.040926,0.218868,1.945370,2.318924,-0.312314,-0.930209,...,0.117596,1.751022,2.012713,0.378033,-0.273077,0.662958,1.627719,-1.358963,-0.708467,1
565,1.711420,2.083301,1.614577,1.722326,0.102368,-0.017817,0.691347,1.262558,-0.217473,-1.057681,...,2.045599,1.420690,1.492668,-0.690623,-0.394473,0.235195,0.733182,-0.531387,-0.973122,1
566,0.706858,2.043775,0.670803,0.577445,-0.839745,-0.038646,0.045004,0.105684,-0.808406,-0.894800,...,1.373645,0.578492,0.426222,-0.808876,0.350427,0.325352,0.413705,-1.103578,-0.318129,1
567,1.845172,2.334403,1.981372,1.733693,1.524426,3.269267,3.294795,2.656528,2.135315,1.042778,...,2.235958,2.301575,1.650790,1.429169,3.901415,3.195002,2.287972,1.917396,2.217684,1


In [4]:
# shuffle feature columns while retaining the original feature names
columns_to_shuffle = [f'feature {i}' for i in range(1, num_columns)]
shuffled_columns = df[columns_to_shuffle].sample(frac=1, axis=1)
#shuffled_columns = shuffled_columns.reindex(columns=columns_to_shuffle)
#shuffled_columns
df[columns_to_shuffle] = shuffled_columns.values
df

Unnamed: 0,feature 1,feature 2,feature 3,feature 4,feature 5,feature 6,feature 7,feature 8,feature 9,feature 10,...,feature 22,feature 23,feature 24,feature 25,feature 26,feature 27,feature 28,feature 29,feature 30,Class
0,-0.213814,2.748204,2.530249,1.567087,1.316568,1.998658,0.723390,2.830540,1.935312,0.906286,...,2.485391,2.301575,3.280628,-2.071512,1.102422,2.614365,1.268389,2.487545,0.660239,1
1,-0.604819,-0.243675,0.547662,-0.826235,-0.691723,1.887973,-0.440393,0.263095,0.280943,-0.099356,...,0.741749,1.533776,-0.486643,-0.353322,1.836635,-0.430066,1.684639,0.498816,0.259933,1
2,-0.296744,1.151242,2.035440,0.941382,0.815054,1.454016,0.212889,0.850180,0.201214,0.293301,...,1.180298,1.346291,1.052000,0.455786,1.586206,1.081980,1.565122,1.227596,1.423575,1
3,0.689095,6.040726,1.450431,3.280667,2.742924,-0.551147,0.818798,0.286341,4.930672,2.045711,...,-0.288125,-0.249720,3.399917,0.253509,-0.767260,3.889975,-0.595257,0.326087,1.114027,1
4,1.481763,-0.867590,1.427237,0.280125,-0.047796,1.218589,0.827742,1.272070,-0.396751,0.498889,...,1.189310,1.337363,0.538866,-1.150804,1.756953,-0.313119,1.775308,1.269426,1.143199,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,1.085429,-1.358963,2.318924,1.040926,0.192350,2.012713,0.665416,2.377491,-0.708467,0.167832,...,2.601897,1.751022,0.218868,0.720838,2.118368,-0.273077,2.059677,2.779634,2.065360,1
565,-0.423637,-0.531387,1.262558,0.102368,-0.069018,1.492668,0.251980,1.155840,-0.973122,-0.490124,...,1.290429,1.420690,-0.017817,2.083301,1.711420,-0.394473,1.614577,1.299356,0.807720,1
566,-0.379008,-1.103578,0.105684,-0.839745,0.661472,0.426222,0.510377,0.276450,-0.318129,0.036694,...,0.180539,0.578492,-0.038646,2.043775,0.706858,0.350427,0.670803,0.184730,0.611619,1
567,-0.172848,1.917396,2.656528,1.524426,2.016901,1.650790,1.301140,1.437265,2.217684,0.903262,...,1.008615,2.301575,3.269267,2.334403,1.845172,3.901415,1.981372,1.156917,0.785031,1


# Training and testing data split

In [5]:
# set a random seed for reproducibility
random_seed = 42

# randomly shuffle the rows of the DataFrame
df = df.sample(frac=1, random_state=random_seed)

# split the DataFrame into training and testing sets
train_size = int(0.67 * len(df))  # set the fraction of data to use for training
train_df = df[:train_size]  # take the first train_size rows for training
test_df = df[train_size:]  # take the remaining rows for testing
train_df

Unnamed: 0,feature 1,feature 2,feature 3,feature 4,feature 5,feature 6,feature 7,feature 8,feature 9,feature 10,...,feature 22,feature 23,feature 24,feature 25,feature 26,feature 27,feature 28,feature 29,feature 30,Class
204,-0.029302,0.183043,-0.275989,0.233908,-0.354960,-0.357536,-0.161787,-0.182535,0.196785,-0.078949,...,-0.220859,-0.333642,0.027626,-0.160345,-0.468452,-0.104649,-0.450600,-0.032714,-0.231130,0
70,-0.864934,-0.565331,0.788366,-0.445835,-0.499606,1.729851,-0.304900,1.295811,-0.999698,-0.792459,...,1.224702,1.745070,-0.027285,0.469736,1.372772,-0.131343,1.301360,1.383377,0.308553,1
131,-0.266769,-0.103052,0.823415,0.912941,-0.593430,0.482444,-0.124684,0.112738,-0.207949,-0.504107,...,0.175264,0.524924,0.340051,0.044257,0.382438,-0.094479,0.398798,0.249278,-0.140372,1
431,0.985513,-0.557249,-0.539371,0.642750,0.417975,-0.642331,0.553775,-0.327451,0.533970,0.835237,...,-0.547517,-0.525294,0.516145,-0.374247,-0.488372,0.054882,-0.434939,-0.823879,-0.020443,0
540,1.701579,-0.924162,-0.592203,0.247418,0.874253,-0.744232,0.783020,-0.612338,-0.144277,0.648923,...,-0.428152,-0.847591,0.145022,-1.127554,-0.733110,-0.269900,-0.716011,-0.457145,0.509517,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
437,0.311412,-0.284084,-0.502003,-0.837612,-0.698983,-0.230863,-0.523875,-0.109829,-0.642580,-0.364655,...,-0.167002,-0.180380,-0.859470,-0.769500,-0.021664,-0.820312,-0.092460,-0.057596,-0.445707,0
258,1.771520,0.608146,2.287999,1.033816,1.925869,0.605427,1.670516,3.587766,0.994071,0.849220,...,2.157853,1.084406,3.920625,0.909165,0.439354,1.671163,0.749107,3.197934,2.989143,1
232,-0.893243,0.016558,-1.094898,-1.319689,-1.041722,-0.721216,-0.949800,-0.681087,-0.602162,-0.476519,...,-0.546857,-0.857709,-1.298947,3.376013,-0.824175,-1.178462,-0.875092,-0.653674,-1.357658,0
115,0.253794,-0.747979,-0.743222,0.093836,-0.022106,-0.524267,-0.448675,-0.428349,0.081068,0.023089,...,-0.341762,-0.586896,-0.489484,0.520887,-0.622124,-0.091301,-0.638530,-0.336703,-0.662066,0


# Generating Features and Class array

In [6]:
positive_mask = (train_df['Class'] == 1)
pos_points = train_df[positive_mask]
neg_points = train_df[~positive_mask]
num_pos = pos_points.shape[0]
num_neg = neg_points.shape[0]
print(neg_points)

     feature 1  feature 2  feature 3  feature 4  feature 5  feature 6  \
204  -0.029302   0.183043  -0.275989   0.233908  -0.354960  -0.357536   
431   0.985513  -0.557249  -0.539371   0.642750   0.417975  -0.642331   
540   1.701579  -0.924162  -0.592203   0.247418   0.874253  -0.744232   
81   -0.082257   1.012235   0.539931   0.813397   0.574349  -0.468221   
477  -1.243283  -0.358437  -0.915633  -1.996589  -0.687813  -0.285502   
..         ...        ...        ...        ...        ...        ...   
136  -0.154197  -1.921455  -0.590656   0.621419  -0.651512  -0.588043   
437   0.311412  -0.284084  -0.502003  -0.837612  -0.698983  -0.230863   
232  -0.893243   0.016558  -1.094898  -1.319689  -1.041722  -0.721216   
115   0.253794  -0.747979  -0.743222   0.093836  -0.022106  -0.524267   
120  -0.333380   0.186276  -0.584729  -0.187021  -0.568299  -0.651643   

     feature 7  feature 8  feature 9  feature 10  ...  feature 22  feature 23  \
204  -0.161787  -0.182535   0.196785   -0.

# Starting LDA

In [7]:
pos_mean = pos_points.mean().to_numpy().reshape((1, -1))[:, :-1]
neg_mean = neg_points.mean().to_numpy().reshape((1, -1))[:, :-1]
pos_mean

array([[-0.16286608,  0.56950751,  1.04990334,  0.47311216,  0.35837656,
         1.03842204,  0.27747873,  0.72713871,  0.41046923,  0.06792556,
         0.53212536,  1.04732374,  0.98076194, -0.05393097, -0.01579337,
         0.45207401,  0.90994652,  0.83967247,  0.62085562, -0.00922591,
         1.08940737,  0.71909452,  1.09495065,  0.79499175,  0.57531936,
         1.00189064,  0.78131833,  1.02154542,  0.76627494,  0.4630725 ]])

In [8]:
pos_points = pos_points.drop(['Class'], axis=1)
neg_points = neg_points.drop(['Class'], axis=1)
pos_diff = pos_points - pos_mean
neg_diff = neg_points - neg_mean

In [9]:
pos_diff = pos_diff.to_numpy()
neg_diff = neg_diff.to_numpy()
pos_diff

array([[-0.70206761, -1.13483809, -0.26153707, ...,  0.2798142 ,
         0.61710166, -0.15451903],
       [-0.10390299, -0.67255959, -0.22648809, ..., -0.62274774,
        -0.51699701, -0.60344479],
       [-0.00998182,  1.34788839,  1.60662492, ...,  0.95982662,
         0.39064253,  0.32195814],
       ...,
       [-0.57650633, -0.71296855, -0.26179478, ...,  0.07787112,
        -0.77014081, -0.52241126],
       [-0.34037118, -1.42901532, -0.82154756, ...,  0.02429438,
         0.0668204 , -0.0345894 ],
       [ 1.93438625,  0.0386381 ,  1.23809524, ..., -0.27243831,
         2.43165952,  2.5260702 ]])

# Getting Covariance Matrix

In [10]:
pos_sum = np.zeros((num_features, num_features))
neg_sum = np.zeros((num_features, num_features))

for x in pos_diff:
    x = x.reshape((1, -1))
    pos_sum += np.matmul(x.T, x)
for x in neg_diff:
    x = x.reshape((1, -1))
    neg_sum += np.matmul(x.T, x)

cov_matrix = pos_sum / num_pos + neg_sum / num_neg
cov_matrix

array([[ 1.66214949, -0.06471251,  0.23388342,  0.74033932,  0.61596692,
        -0.22433713,  0.52436863,  0.39308817,  0.22015287,  0.68024487,
         0.69772744, -0.02692216, -0.18343228,  0.81689563,  0.74019937,
         0.49194565,  0.37505073,  0.03435007, -0.07667836,  0.80127879,
        -0.28763165,  0.23164148, -0.25383959,  0.40149209,  0.06039422,
        -0.26203676,  0.00949848, -0.2225969 ,  0.41242809,  0.58419355],
       [-0.06471251,  2.1526792 ,  0.2383267 ,  0.64812541,  0.48283981,
        -0.22488705,  0.31981093, -0.13493937,  1.0992011 ,  0.30114107,
         0.75326045,  0.48307816, -0.33644352,  1.18156107,  0.98387954,
         1.4767063 ,  0.44575599,  0.71008922,  0.04194256, -0.33553341,
        -0.18628523, -0.23024463, -0.11315939,  0.79820897, -0.18957557,
        -0.32274949,  1.00764105, -0.26009533, -0.21859078,  0.05039144],
       [ 0.23388342,  0.2383267 ,  1.07329421,  0.69212757,  0.69931052,
         0.62390043,  0.59801485,  0.64315955,  0

In [11]:
cov_matrix.shape

(30, 30)

# Getting Weight Array

In [12]:
w = np.matmul(np.linalg.inv(cov_matrix), (pos_mean - neg_mean).T)
w = w / np.linalg.norm(w)
w

array([[ 0.03235886],
       [ 0.03004852],
       [ 0.03906184],
       [ 0.00730442],
       [-0.0007842 ],
       [-0.56259821],
       [-0.08348073],
       [-0.18304311],
       [ 0.1464273 ],
       [-0.06173566],
       [ 0.0121644 ],
       [ 0.07135753],
       [-0.0510589 ],
       [ 0.02535336],
       [-0.02044448],
       [-0.00373752],
       [ 0.03472199],
       [ 0.11286042],
       [ 0.02551657],
       [ 0.01426785],
       [ 0.65414367],
       [-0.06289458],
       [ 0.17826701],
       [-0.13540325],
       [ 0.03113563],
       [-0.04031705],
       [-0.09954372],
       [ 0.05057987],
       [ 0.29747706],
       [ 0.07057209]])

In [13]:
w.shape

(30, 1)

In [14]:
# Finding projection of classes
project_pos = np.dot(pos_points.to_numpy(), w.flatten())
project_neg = np.dot(neg_points.to_numpy(), w.flatten())
print(project_pos.shape)

(136,)


# Function to find intersection of two normal curves

In [15]:
def solve(m1,m2,std1,std2):
    a = 1/(2*std1**2) - 1/(2*std2**2)
    b = m2/(std2**2) - m1/(std1**2)
    c = m1**2 /(2*std1**2) - m2**2 / (2*std2**2) - np.log(std2/std1)
    return np.roots([a,b,c])

# Finding the Intersection

In [16]:
positive_mean = project_pos.mean()
negative_mean = project_neg.mean()
positive_std = project_pos.std()
negative_std = project_neg.std()

result = solve(positive_mean, negative_mean, positive_std, negative_std)
result = result[1]

# Visualizing the results

In [17]:
arrp = np.zeros((len(project_pos)))
arrn = np.zeros((len(project_neg)))

plt.figure(figsize = (10,7))
plt.title(f'Threshold Value: {result}')
plt.scatter(project_pos, arrp, color = 'red', label = 'positive point')
plt.scatter(project_neg, arrn, color = 'blue', label = 'negative point')
yp = np.linspace(-0.1, 0.1, 1000)
xp = result * np.ones((1000))
plt.plot(xp, yp*1000, color = 'green', label = 'Threshold value')
plt.ylim(-0.1, 0.1)
plt.xlim(-2, 2.5)
plt.legend();

<IPython.core.display.Javascript object>

In [18]:
x = np.linspace(-5, 5, 500)
ptemp = np.zeros((len(project_pos), 1))
ntemp = np.zeros((len(project_neg), 1))

plt.figure(figsize = (10,7))
plt.title(f'Threshold Value: {result}')
plt.plot(x, norm.pdf(x, positive_mean, positive_std), color = 'red', label = 'Positive point')
plt.plot(x, norm.pdf(x, negative_mean, negative_std), color = 'blue', label = 'Negative point')

plt.scatter(project_pos, ptemp, color = 'red')
plt.scatter(project_neg, ntemp ,color = 'blue')

yp = np.linspace(-0.1, 0.1, 1000)
xp = result * np.ones((1000))
plt.plot(xp, yp*1000, color = 'green', label = 'Threshold value')
plt.ylim(-0.1, 2.5)
plt.xlim(-2, 2.5)
plt.legend();

<IPython.core.display.Javascript object>

#  Testing the model with training and testing data

In [19]:
pred_pos_dataset = pd.DataFrame(project_pos)
pred_neg_dataset = pd.DataFrame(project_neg)

pred_pos_dataset = pred_pos_dataset.values
pred_neg_dataset = pred_neg_dataset.values

ptemp = np.zeros((len(pred_pos_dataset), 1))
ntemp = np.zeros((len(pred_neg_dataset), 1))
pred_pos = np.append(pred_pos_dataset, ptemp, axis=1)
pred_neg = np.append(pred_neg_dataset, ntemp, axis=1)

tp=0
tn=0
fp=0
fn=0
for i in range(len(pred_pos_dataset)):
  if pred_pos[i, 0] < result:
    pred_pos[i, 1] = 0
    fn+=1
  else:
    pred_pos[i, 1] = 1
    tp+=1

for i in range(len(pred_neg_dataset)):
  if pred_neg[i, 0] < result:
    pred_neg[i, 1] = 0
    tn+=1
  else:
    pred_neg[i, 1] = 1
    fp+=1

accuracy = (tp+tn)/(tp+tn+fp+fn)
Recall = tp/ (tp + fn)
Precision = tp/ (tp + fp)
print(f'Accuracy of the model with testing is {accuracy * 100}')
print(f'Recall of the model with testing is {Recall * 100}')
print(f'Precision of the model with testing is {Precision * 100}')

Accuracy of the model with testing is 97.3753280839895
Recall of the model with testing is 94.11764705882352
Precision of the model with testing is 98.46153846153847


In [20]:
positive_mask = (test_df['Class'] == 1)
pos_points = test_df[positive_mask]
neg_points = test_df[~positive_mask]
num_pos = pos_points.shape[0]
num_neg = neg_points.shape[0]
print(neg_points.shape)

(112, 31)


In [21]:
pos_points = pos_points.drop(['Class'], axis=1)
neg_points = neg_points.drop(['Class'], axis=1)


In [22]:
# Finding projection of classes
project_pos = np.dot(pos_points.to_numpy(), w.flatten())
project_neg = np.dot(neg_points.to_numpy(), w.flatten())
print(project_pos)

[ 0.46546247  0.3366026   0.55426028  0.24865036  0.26491838  0.21166944
  0.73969493  0.25804577  0.11690911  0.60152608  0.56542835  0.6869738
  0.42744701  0.08458258  0.03082597  0.36373557  0.74342213  0.67811469
  0.75507272  0.44704865  0.51664899  0.17689579  0.24723433  0.19441064
  0.33251408  0.10324432  0.46596591  0.62970644  0.83410841  0.54156468
  0.6064463   0.464161    0.61549803  0.79742047  0.47726832  0.92787494
  0.3509841   0.03715751  0.207409    0.46735168  0.88516249  0.13730356
  0.29809268 -0.03108672  0.13931482  0.68208066  0.82843665  0.29056147
  0.45930109  0.34396815  0.22498788  0.39192316  0.55268821  0.29604226
  0.07688535  0.6645185   0.51237789  0.59845431  0.57879278  0.16999398
  0.50881799  0.90201809  0.15244195  0.7820947   0.11799133  0.70928938
  1.02602317  0.22992519  0.80211488  0.15729784  0.41532519  1.05681693
  0.25260035  0.61103446  0.57430637  0.39102666]


In [23]:
pred_pos_dataset = pd.DataFrame(project_pos)
pred_neg_dataset = pd.DataFrame(project_neg)

pred_pos_dataset = pred_pos_dataset.values
pred_neg_dataset = pred_neg_dataset.values

ptemp = np.zeros((len(pred_pos_dataset), 1))
ntemp = np.zeros((len(pred_neg_dataset), 1))
pred_pos = np.append(pred_pos_dataset, ptemp, axis=1)
pred_neg = np.append(pred_neg_dataset, ntemp, axis=1)

tp=0
tn=0
fp=0
fn=0
for i in range(len(pred_pos_dataset)):
  if pred_pos[i, 0] < result:
    pred_pos[i, 1] = 0
    fn+=1
  else:
    pred_pos[i, 1] = 1
    tp+=1

for i in range(len(pred_neg_dataset)):
  if pred_neg[i, 0] < result:
    pred_neg[i, 1] = 0
    tn+=1
  else:
    pred_neg[i, 1] = 1
    fp+=1

accuracy = (tp+tn)/(tp+tn+fp+fn)
Recall = tp/ (tp + fn)
Precision = tp/ (tp + fp)
print(f'Accuracy of the model with testing is {accuracy * 100}')
print(f'Recall of the model with testing is {Recall * 100}')
print(f'Precision of the model with testing is {Precision * 100}')

Accuracy of the model with testing is 96.80851063829788
Recall of the model with testing is 94.73684210526315
Precision of the model with testing is 97.2972972972973
