In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [2]:
# load the data into a pandas DataFrame
df = pd.read_csv("dataset/breast-cancer.csv")

column_y = 'diagnosis'

In [3]:
df

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,842517,M,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,84300903,M,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,84358402,M,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,926424,M,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,926682,M,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,926954,M,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,927241,M,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [4]:
def preprocess_X_and_y(data, column_y, other_columns_to_remove=[]):
    '''
    This function is to separate the feature columns and the target columns.
    data is a dataframe of single table.
    column_y contain the name of the target column. (Assuming only one target column)
    other_columns_to_remove contain the list of the columns that are need to be deleted.
    '''
    
    if len(other_columns_to_remove) != 0:
        X = data.drop(columns=other_columns_to_remove)
    
    X = X.drop(columns=column_y)
    y = data[column_y]
    
    return X,y

In [5]:
# separate the features and target
X,y = preprocess_X_and_y(df, column_y, other_columns_to_remove=['id'])

In [6]:
def preprocess_encode_y(data, classes):
    '''
    This function process the target column and convert it into numerical format.
    y is the target column.
    classes is the dictionary of the conversion.
    '''
    
    for i in classes.keys():
        data = data.replace(i,classes[i])
    
    return data

In [7]:
classes = {'M':1, 'B':0}

y = preprocess_encode_y(y, classes)

In [8]:
X

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [9]:
y

0      1
1      1
2      1
3      1
4      1
      ..
564    1
565    1
566    1
567    1
568    0
Name: diagnosis, Length: 569, dtype: int64

In [10]:
number_of_trees = 10

# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# train the random forest classifier
clf = RandomForestClassifier(n_estimators=number_of_trees)
clf.fit(X_train, y_train)

# evaluate the model on the test set
accuracy = clf.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.9649122807017544


In [11]:
X_train

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
194,14.86,23.21,100.40,671.4,0.10440,0.19800,0.169700,0.088780,0.1737,0.06672,...,16.08,27.78,118.60,784.7,0.13160,0.46480,0.458900,0.172700,0.3000,0.08701
382,12.05,22.72,78.75,447.8,0.06935,0.10730,0.079430,0.029780,0.1203,0.06659,...,12.57,28.71,87.36,488.4,0.08799,0.32140,0.291200,0.109200,0.2191,0.09349
402,12.96,18.29,84.18,525.2,0.07351,0.07899,0.040570,0.018830,0.1874,0.05899,...,14.13,24.61,96.31,621.9,0.09329,0.23180,0.160400,0.066080,0.3207,0.07247
197,18.08,21.84,117.40,1024.0,0.07371,0.08642,0.110300,0.057780,0.1770,0.05340,...,19.76,24.70,129.10,1228.0,0.08822,0.19630,0.253500,0.091810,0.2369,0.06558
415,11.89,21.17,76.39,433.8,0.09773,0.08120,0.025550,0.021790,0.2019,0.06290,...,13.05,27.21,85.09,522.9,0.14260,0.21870,0.116400,0.082630,0.3075,0.07351
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178,13.01,22.22,82.01,526.4,0.06251,0.01938,0.001595,0.001852,0.1395,0.05234,...,14.00,29.02,88.18,608.8,0.08125,0.03432,0.007977,0.009259,0.2295,0.05843
316,12.18,14.08,77.25,461.4,0.07734,0.03212,0.011230,0.005051,0.1673,0.05649,...,12.85,16.47,81.60,513.1,0.10010,0.05332,0.041160,0.018520,0.2293,0.06037
562,15.22,30.62,103.40,716.9,0.10480,0.20870,0.255000,0.094290,0.2128,0.07152,...,17.52,42.79,128.70,915.0,0.14170,0.79170,1.170000,0.235600,0.4089,0.14090
138,14.95,17.57,96.85,678.1,0.11670,0.13050,0.153900,0.086240,0.1957,0.06216,...,18.55,21.43,121.40,971.4,0.14110,0.21640,0.335500,0.166700,0.3414,0.07147


In [12]:
y_train

194    1
382    0
402    0
197    1
415    0
      ..
178    0
316    0
562    1
138    1
503    1
Name: diagnosis, Length: 455, dtype: int64

In [13]:
def preprocessing_new_features_row(predictions):
    
    prediction = []
    length_y = len(predictions[0])
    features = []
    number_of_trees = len(predictions)
    
    for i in range(length_y):
        features.append([])
        
    for i in range(number_of_trees):
        prediction.append(predictions[i])
        
    for i in range(number_of_trees):
        for j in range(length_y):
            features[j].append(prediction[i][j])
    
    return pd.DataFrame(features)

def preprocessing_new_features(model, data):
    '''
    This function will return the final predictions by each decision tree for the given data.
    '''
    
    predictions = []
    
    # This for loop will predict the predictions for each tree
    # data - predictions from each tree
    for i, tree in enumerate(model.estimators_):
        y_pred = tree.predict(data)
        predictions.append(y_pred)
    
    # converting to row - predictions from each tree.
    return preprocessing_new_features_row(predictions)

In [14]:
new_features_X = preprocessing_new_features(clf, X_train)
new_features_X



Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
450,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
451,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
452,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
453,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0


In [15]:
def preprocess_one_hot_encoding(data, num_classes):
    data_length = len(data)
    
    y_data = []
    
    print(data_length)
    print(data)
    print(data.iloc[0])
    
    for i in range(data_length):
        t = []
        for k in range(num_classes):
            t.append(0)
        t[data.iloc[i]] = 1
        y_data.append(t)
    return y_data

In [16]:
y_train_ohe = preprocess_one_hot_encoding(y_train, 2)
y_train_ohe

455
194    1
382    0
402    0
197    1
415    0
      ..
178    0
316    0
562    1
138    1
503    1
Name: diagnosis, Length: 455, dtype: int64
1


[[0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 

In [17]:
# y_train = pd.DataFrame(y_train.values.tolist())
y_train = pd.DataFrame(y_train_ohe)

In [18]:
y_train

Unnamed: 0,0,1
0,0,1
1,1,0
2,1,0
3,0,1
4,1,0
...,...,...
450,1,0
451,1,0
452,0,1
453,0,1


In [19]:
print(new_features_X.shape)

(455, 10)


In [20]:
new_features_X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
450,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
451,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
452,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
453,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0


In [21]:
# # number of zeros and ones
# one = 0
# zero = 0
# for i in range(len(y_train)):
#     if y_train[0][i]==1:
#         one+=1
#     else:
#         zero+=1

# print('0', zero)
# print('1', one)

### Training a simple MLP

In [22]:
from tensorflow.keras import layers
from tensorflow.keras import Input
from tensorflow.keras import models

2023-03-01 09:43:28.718361: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-01 09:43:29.435643: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/rajat1/gpu_pipeline/installations/lib64
2023-03-01 09:43:29.435714: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/rajat1/gpu_pipeline/installations/lib64


In [23]:
model = models.Sequential()
model.add(Input(shape=(number_of_trees,)))
model.add(layers.Dense(50, activation='relu'))
model.add(layers.Dense(50, activation='relu'))
model.add(layers.Dense(2, activation='softmax'))

2023-03-01 09:43:30.030516: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/rajat1/gpu_pipeline/installations/lib64
2023-03-01 09:43:30.030554: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1934] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2023-03-01 09:43:30.031190: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild Tenso

In [24]:
model.compile(optimizer='Adam',
              loss='BinaryCrossentropy',
              metrics=['accuracy']
             )

In [25]:
model.fit(new_features_X,
          y_train,
          epochs=3,
          batch_size=32
         )

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f2c54291e20>

In [26]:
X_test

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
251,11.50,18.45,73.28,407.4,0.09345,0.05991,0.02638,0.02069,0.1834,0.05934,...,12.97,22.46,83.12,508.9,0.1183,0.10490,0.08105,0.06544,0.2740,0.06487
431,12.40,17.68,81.47,467.8,0.10540,0.13160,0.07741,0.02799,0.1811,0.07102,...,12.88,22.91,89.61,515.8,0.1450,0.26290,0.24030,0.07370,0.2556,0.09359
529,12.07,13.44,77.83,445.2,0.11000,0.09009,0.03781,0.02798,0.1657,0.06608,...,13.45,15.77,86.92,549.9,0.1521,0.16320,0.16220,0.07393,0.2781,0.08052
222,10.18,17.53,65.12,313.1,0.10610,0.08502,0.01768,0.01915,0.1910,0.06908,...,11.17,22.84,71.94,375.6,0.1406,0.14400,0.06572,0.05575,0.3055,0.08797
373,20.64,17.35,134.80,1335.0,0.09446,0.10760,0.15270,0.08941,0.1571,0.05478,...,25.37,23.17,166.80,1946.0,0.1562,0.30550,0.41590,0.21120,0.2689,0.07055
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28,15.30,25.27,102.40,732.4,0.10820,0.16970,0.16830,0.08751,0.1926,0.06540,...,20.27,36.71,149.30,1269.0,0.1641,0.61100,0.63350,0.20240,0.4027,0.09876
253,17.30,17.08,113.00,928.2,0.10080,0.10410,0.12660,0.08353,0.1813,0.05613,...,19.85,25.09,130.90,1222.0,0.1416,0.24050,0.33780,0.18570,0.3138,0.08113
67,11.31,19.04,71.80,394.1,0.08139,0.04701,0.03709,0.02230,0.1516,0.05667,...,12.33,23.84,78.00,466.7,0.1290,0.09148,0.14440,0.06961,0.2400,0.06641
460,17.08,27.15,111.20,930.9,0.09898,0.11100,0.10070,0.06431,0.1793,0.06281,...,22.96,34.49,152.10,1648.0,0.1600,0.24440,0.26390,0.15550,0.3010,0.09060


In [27]:
new_test_features_X = preprocessing_new_features(clf, X_test)
new_test_features_X



Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...
109,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
110,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
112,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [28]:
y_test_ohe = preprocess_one_hot_encoding(y_test, 2)
y_test_ohe

114
251    0
431    0
529    0
222    0
373    1
      ..
28     1
253    1
67     0
460    1
36     1
Name: diagnosis, Length: 114, dtype: int64
0


[[1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 

In [29]:
# y_test = pd.DataFrame(y_test.values.tolist())
y_test = pd.DataFrame(y_test_ohe)
y_test

Unnamed: 0,0,1
0,1,0
1,1,0
2,1,0
3,1,0
4,0,1
...,...,...
109,0,1
110,0,1
111,1,0
112,0,1


In [30]:
result = model.evaluate(new_test_features_X, y_test)



In [None]:
res = model.predict(new_test_features_X)

In [None]:
print(y_test[0][0])

In [None]:
t = 0
f = 0

for i in range(len(res)):
    if res[i]>0.5:
        if y_test[0][i] == 1:
            t+=1
        else:
            print('predicted',res[i])
            print('actual',y_test[0][i])
            f+=1
    else:
        if y_test[0][i] == 0:
            t+=1
        else:
            print('predicted',res[i])
            print('actual',y_test[0][i])
            f+=1


In [None]:
print('true pred =',t)
print("false pred =",f)