In [96]:
import pandas as pd
import numpy as np

diabetes = pd.read_csv('diabetes.csv')
diabetes = diabetes.dropna()
diabetes

Unnamed: 0,Glucose,BloodPressure,SkinThickness,Insulin,BMI,Age,Outcome
0,148,72,35,0,33.6,50,1
1,85,66,29,0,26.6,31,0
2,183,64,0,0,23.3,32,1
3,89,66,23,94,28.1,21,0
4,137,40,35,168,43.1,33,1
...,...,...,...,...,...,...,...
763,101,76,48,180,32.9,63,0
764,122,70,27,0,36.8,27,0
765,121,72,23,112,26.2,30,0
766,126,60,0,0,30.1,47,1


In [97]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(diabetes,diabetes['Outcome'],test_size=0.2)


In [98]:
df = x_train.copy()
diabetes_yes = df.where(df['Outcome']==1).dropna()
diabetes_yes_prob = len(diabetes_yes)/len(df)
diabetes_no = df.where(df['Outcome']==0).dropna()
diabetes_no_prob = len(diabetes_no)/len(df)

print(diabetes_yes_prob,diabetes_no_prob)

0.36319218241042345 0.6368078175895765


In [99]:
columns = df.iloc[:,:6].columns.to_list()
classes = 15
column_details = []
for column_name in columns:
    min_val = min(df[column_name].to_list())
    max_val = max(df[column_name].to_list())
    class_size = round((max_val-min_val)/classes , 2)

    low_bounds = list(np.arange(min_val,max_val,class_size))
    for i in range(len(low_bounds)):
        low_bounds[i] = round(low_bounds[i],2)
    x = low_bounds[-1]
    low_bounds.append(round(x + class_size,2))
    # print(low_bounds)
    # mid_class = round((class_size/2) , 2)
    dict = {}
    # start_mid = mid_class
    # df_temp = df[column_name]
    # print(df_temp)
    # print(len(diabetes_yes) , len(diabetes_no))

    for i in range(classes):
#         # print(start_mid)
        low_bound = low_bounds[i]
        upp_bound = low_bounds[i+1]

        yes_count = diabetes_yes.where(diabetes_yes[column_name]>=low_bound).dropna()
        if i<classes-1:
            yes_count = yes_count.where(yes_count[column_name]<upp_bound).dropna()
        else:
            yes_count = yes_count.where(yes_count[column_name]<=upp_bound).dropna()

        no_count = diabetes_no.where(diabetes_no[column_name]>=low_bound).dropna()
        if i<classes-1:
            no_count = no_count.where(no_count[column_name]<upp_bound).dropna()
        else:
            no_count = no_count.where(no_count[column_name]<=upp_bound).dropna()


#         # print(len(yes_count)/len(diabetes_yes))
#         # print(len(yes_count)/len(diabetes_yes) , len(no_count)/len(diabetes_no))

        dict[low_bounds[i]] = [len(yes_count)/len(diabetes_yes) , len(no_count)/len(diabetes_no)]
#         # print([low_bound,upp_bound])
#         start = round((start_mid + class_size),2)

    column_details.append([column_name, dict , class_size])
column_details

[['Glucose',
  {0.0: [0.008968609865470852, 0.0076726342710997444],
   13.27: [0.0, 0.0],
   26.54: [0.0, 0.0],
   39.81: [0.0, 0.0025575447570332483],
   53.08: [0.0, 0.01278772378516624],
   66.35: [0.004484304932735426, 0.058823529411764705],
   79.62: [0.03139013452914798, 0.16112531969309463],
   92.89: [0.08520179372197309, 0.2506393861892583],
   106.16: [0.14798206278026907, 0.1867007672634271],
   119.43: [0.17040358744394618, 0.1483375959079284],
   132.7: [0.12556053811659193, 0.0792838874680307],
   145.97: [0.12556053811659193, 0.05115089514066496],
   159.24: [0.10762331838565023, 0.017902813299232736],
   172.51: [0.10762331838565023, 0.01278772378516624],
   185.78: [0.08520179372197309, 0.010230179028132993]},
  13.27],
 ['BloodPressure',
  {0.0: [0.05829596412556054, 0.043478260869565216],
   8.13: [0.0, 0.0],
   16.26: [0.0, 0.0],
   24.39: [0.004484304932735426, 0.0025575447570332483],
   32.52: [0.004484304932735426, 0.0025575447570332483],
   40.65: [0.00448430493

In [100]:
def get_outcome(input_data):
    probabilities = []

    for i in range(6):
        value = input_data[i]
        prob_dict = column_details[i][1]
        class_range = column_details[i][2]

        key_list = list(prob_dict.keys())
        
        for key in key_list:
            low = key
            up = key+class_range

            if key_list[-1] == key:
                if value>=low and value <= up:
                    probabilities.append(prob_dict[key])
                    break
            else:
                if value>=low and value < up:
                    probabilities.append(prob_dict[key])
                    break

    yes=1
    no=1
    for i in probabilities:
        if i[0] == 0 and i[1]==0:
            continue
        yes=yes*i[0]
        no=no*i[1]
    
    if yes==1 or no==1:
        return 0

    product = [yes*diabetes_yes_prob , no*diabetes_no_prob]
    total = yes*diabetes_yes_prob + no*diabetes_no_prob

    
    outcome = [product[0]/total, product[1]/total]

    if outcome[0] > outcome[1]:
        return 1
    else:
        return 0

In [101]:
test = x_test.iloc[:,:6].copy()
test_set = []
for i in range(154):
    test_set.append(test.iloc[i].to_list())

by_nb = []
for i in test_set:
    by_nb.append(get_outcome(i))


In [102]:
by_dataset = y_test.to_list()
correct = 0
for i in range(len(by_nb)):
    if by_nb[i]==by_dataset[i]:
        correct = correct + 1

print(correct/len(by_nb)*100)

79.87012987012987


In [103]:
x_test

Unnamed: 0,Glucose,BloodPressure,SkinThickness,Insulin,BMI,Age,Outcome
499,154,74,32,193,29.3,39,0
409,172,68,49,579,42.4,28,1
319,194,78,0,0,23.5,59,1
232,79,80,25,37,25.4,22,0
250,106,52,0,0,31.2,42,0
...,...,...,...,...,...,...,...
345,126,88,36,108,38.5,49,0
463,88,78,30,0,27.6,37,0
597,89,24,19,25,27.8,21,0
484,145,0,0,0,44.2,31,1


In [104]:
print("Enter values for the following columns:\n" + ", ".join(columns) + "\n")
user_input  = input()
user_input = user_input.split(' ')
input_data = [float(i) for i in user_input]
print(input_data)
get_outcome(input_data)

Enter values for the following columns:
Glucose, BloodPressure, SkinThickness, Insulin, BMI, Age

[79.0, 80.0, 25.0, 37.0, 25.4, 22.0]


0