In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [2]:
# load the data into a pandas DataFrame
df = pd.read_csv("dataset/breast-cancer.csv")

column_y = 'diagnosis'

In [3]:
df

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,842517,M,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,84300903,M,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,84358402,M,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,926424,M,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,926682,M,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,926954,M,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,927241,M,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [4]:
def preprocess_X_and_y(data, column_y, other_columns_to_remove=[]):
    '''
    This function is to separate the feature columns and the target columns.
    data is a dataframe of single table.
    column_y contain the name of the target column. (Assuming only one target column)
    other_columns_to_remove contain the list of the columns that are need to be deleted.
    '''
    
    if len(other_columns_to_remove) != 0:
        X = data.drop(columns=other_columns_to_remove)
    
    X = X.drop(columns=column_y)
    y = data[column_y]
    
    return X,y

In [5]:
# separate the features and target
X,y = preprocess_X_and_y(df, column_y, other_columns_to_remove=['id'])

In [6]:
def preprocess_encode_y(data, classes):
    '''
    This function process the target column and convert it into numerical format.
    y is the target column.
    classes is the dictionary of the conversion.
    '''
    
    for i in classes.keys():
        data = data.replace(i,classes[i])
    
    return data

In [7]:
classes = {'M':1, 'B':0}

y = preprocess_encode_y(y, classes)

In [8]:
X

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [9]:
y

0      1
1      1
2      1
3      1
4      1
      ..
564    1
565    1
566    1
567    1
568    0
Name: diagnosis, Length: 569, dtype: int64

In [10]:
number_of_trees = 10

# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# train the random forest classifier
clf = RandomForestClassifier(n_estimators=number_of_trees)
clf.fit(X_train, y_train)

# evaluate the model on the test set
accuracy = clf.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.956140350877193


In [11]:
X_train

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
339,23.510,24.27,155.10,1747.0,0.10690,0.12830,0.23080,0.14100,0.1797,0.05506,...,30.67,30.73,202.40,2906.0,0.1515,0.26780,0.4819,0.20890,0.2593,0.07738
545,13.620,23.23,87.19,573.2,0.09246,0.06747,0.02974,0.02443,0.1664,0.05801,...,15.35,29.09,97.58,729.8,0.1216,0.15170,0.1049,0.07174,0.2642,0.06953
533,20.470,20.67,134.70,1299.0,0.09156,0.13130,0.15230,0.10150,0.2166,0.05419,...,23.23,27.15,152.00,1645.0,0.1097,0.25340,0.3092,0.16130,0.3220,0.06386
391,8.734,16.84,55.27,234.3,0.10390,0.07428,0.00000,0.00000,0.1985,0.07098,...,10.17,22.80,64.01,317.0,0.1460,0.13100,0.0000,0.00000,0.2445,0.08865
4,20.290,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.54,16.67,152.20,1575.0,0.1374,0.20500,0.4000,0.16250,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
318,9.042,18.90,60.07,244.5,0.09968,0.19720,0.19750,0.04908,0.2330,0.08743,...,10.06,23.40,68.62,297.1,0.1221,0.37480,0.4609,0.11450,0.3135,0.10550
565,20.130,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.69,38.25,155.00,1731.0,0.1166,0.19220,0.3215,0.16280,0.2572,0.06637
304,11.460,18.16,73.59,403.1,0.08853,0.07694,0.03344,0.01502,0.1411,0.06243,...,12.68,21.61,82.69,489.8,0.1144,0.17890,0.1226,0.05509,0.2208,0.07638
414,15.130,29.81,96.71,719.5,0.08320,0.04605,0.04686,0.02739,0.1852,0.05294,...,17.26,36.91,110.10,931.4,0.1148,0.09866,0.1547,0.06575,0.3233,0.06165


In [12]:
y_train

339    1
545    0
533    1
391    0
4      1
      ..
318    0
565    1
304    0
414    1
505    0
Name: diagnosis, Length: 455, dtype: int64

In [13]:
def preprocessing_new_features_row(predictions):
    
    prediction = []
    length_y = len(predictions[0])
    features = []
    number_of_trees = len(predictions)
    
    for i in range(length_y):
        features.append([])
        
    for i in range(number_of_trees):
        prediction.append(predictions[i])
        
    for i in range(number_of_trees):
        for j in range(length_y):
            features[j].append(prediction[i][j])
    
    return pd.DataFrame(features)

def preprocessing_new_features(model, data):
    '''
    This function will return the final predictions by each decision tree for the given data.
    '''
    
    predictions = []
    
    # This for loop will predict the predictions for each tree
    # data - predictions from each tree
    for i, tree in enumerate(model.estimators_):
        y_pred = tree.predict(data)
        predictions.append(y_pred)
    
    # converting to row - predictions from each tree.
    return preprocessing_new_features_row(predictions)

In [14]:
new_features_X = preprocessing_new_features(clf, X_train)
new_features_X



Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...
450,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
451,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
452,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
453,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [15]:
def preprocess_one_hot_encoding(data, num_classes):
    data_length = len(data)
    
    y_data = []
    
    print(data_length)
    print(data)
    print(data.iloc[0])
    
    for i in range(data_length):
        t = []
        for k in range(num_classes):
            t.append(0)
        t[data.iloc[i]] = 1
        y_data.append(t)
    return y_data

In [16]:
y_train_ohe = preprocess_one_hot_encoding(y_train, 2)
y_train_ohe

455
339    1
545    0
533    1
391    0
4      1
      ..
318    0
565    1
304    0
414    1
505    0
Name: diagnosis, Length: 455, dtype: int64
1


[[0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 

In [17]:
# y_train = pd.DataFrame(y_train.values.tolist())
y_train = pd.DataFrame(y_train_ohe)

In [18]:
y_train

Unnamed: 0,0,1
0,0,1
1,1,0
2,0,1
3,1,0
4,0,1
...,...,...
450,1,0
451,0,1
452,1,0
453,0,1


In [19]:
print(new_features_X.shape)

(455, 10)


In [20]:
new_features_X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...
450,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
451,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
452,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
453,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [21]:
# # number of zeros and ones
# one = 0
# zero = 0
# for i in range(len(y_train)):
#     if y_train[0][i]==1:
#         one+=1
#     else:
#         zero+=1

# print('0', zero)
# print('1', one)

### Training a simple MLP

In [22]:
from tensorflow.keras import layers
from tensorflow.keras import Input
from tensorflow.keras import models

2023-03-15 18:55:24.330675: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-15 18:55:30.283029: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/rajat1/gpu_pipeline/installations/lib64
2023-03-15 18:55:30.288654: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/rajat1/gpu_pipeline/installations/lib64


In [23]:
model = models.Sequential()
model.add(Input(shape=(number_of_trees,)))
model.add(layers.Dense(50, activation='relu'))
model.add(layers.Dense(50, activation='relu'))
model.add(layers.Dense(2, activation='softmax'))

2023-03-15 18:55:33.516525: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/rajat1/gpu_pipeline/installations/lib64
2023-03-15 18:55:33.516555: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1934] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2023-03-15 18:55:33.528756: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild Tenso

In [24]:
model.compile(optimizer='Adam',
              loss='BinaryCrossentropy',
              metrics=['accuracy']
             )

In [25]:
model.fit(new_features_X,
          y_train,
          epochs=3,
          batch_size=32
         )

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f60004b9df0>

In [26]:
X_test

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
327,12.030,17.93,76.09,446.0,0.07683,0.03892,0.001546,0.005592,0.1382,0.06070,...,13.07,22.25,82.74,523.4,0.1013,0.07390,0.007732,0.02796,0.2171,0.07037
258,15.660,23.20,110.20,773.5,0.11090,0.31140,0.317600,0.137700,0.2495,0.08104,...,19.85,31.64,143.70,1226.0,0.1504,0.51720,0.618100,0.24620,0.3277,0.10190
144,10.750,14.97,68.26,355.3,0.07793,0.05139,0.022510,0.007875,0.1399,0.05688,...,11.95,20.72,77.79,441.2,0.1076,0.12230,0.097550,0.03413,0.2300,0.06769
518,12.880,18.22,84.45,493.1,0.12180,0.16610,0.048250,0.053030,0.1709,0.07253,...,15.05,24.37,99.31,674.7,0.1456,0.29610,0.124600,0.10960,0.2582,0.08893
21,9.504,12.44,60.34,273.9,0.10240,0.06492,0.029560,0.020760,0.1815,0.06905,...,10.23,15.66,65.13,314.9,0.1324,0.11480,0.088670,0.06227,0.2450,0.07773
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223,15.750,20.25,102.60,761.3,0.10250,0.12040,0.114700,0.064620,0.1935,0.06303,...,19.56,30.29,125.90,1088.0,0.1552,0.44800,0.397600,0.14790,0.3993,0.10640
265,20.730,31.12,135.70,1419.0,0.09469,0.11430,0.136700,0.086460,0.1769,0.05674,...,32.49,47.16,214.00,3432.0,0.1401,0.26440,0.344200,0.16590,0.2868,0.08218
153,11.150,13.08,70.87,381.9,0.09754,0.05113,0.019820,0.017860,0.1830,0.06105,...,11.99,16.30,76.25,440.8,0.1341,0.08971,0.071160,0.05506,0.2859,0.06772
550,10.860,21.48,68.51,360.5,0.07431,0.04227,0.000000,0.000000,0.1661,0.05948,...,11.66,24.77,74.08,412.3,0.1001,0.07348,0.000000,0.00000,0.2458,0.06592


In [27]:
new_test_features_X = preprocessing_new_features(clf, X_test)
new_test_features_X



Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
109,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
110,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
112,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
y_test_ohe = preprocess_one_hot_encoding(y_test, 2)
y_test_ohe

114
327    0
258    1
144    0
518    0
21     0
      ..
223    1
265    1
153    0
550    0
297    1
Name: diagnosis, Length: 114, dtype: int64
0


[[1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 [1, 0],
 [1, 0],
 [0, 1],
 [0, 1],
 [0, 1],
 [1, 0],
 [0, 1],
 [0, 1],
 [1, 0],
 [0, 1],
 [1, 0],
 [1, 0],
 [1, 0],
 [0, 1],
 [1, 0],
 [0, 1],
 [0, 1],
 

In [29]:
# y_test = pd.DataFrame(y_test.values.tolist())
y_test = pd.DataFrame(y_test_ohe)
y_test

Unnamed: 0,0,1
0,1,0
1,0,1
2,1,0
3,1,0
4,1,0
...,...,...
109,0,1
110,0,1
111,1,0
112,1,0


In [30]:
result = model.evaluate(new_test_features_X, y_test)



In [31]:
res = model.predict(new_test_features_X)



In [32]:
print(y_test[0][0])

1


In [33]:
t = 0
f = 0

for i in range(len(res)):
    if res[i]>0.5:
        if y_test[0][i] == 1:
            t+=1
        else:
            print('predicted',res[i])
            print('actual',y_test[0][i])
            f+=1
    else:
        if y_test[0][i] == 0:
            t+=1
        else:
            print('predicted',res[i])
            print('actual',y_test[0][i])
            f+=1


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
print('true pred =',t)
print("false pred =",f)