<a href="https://colab.research.google.com/github/sagyant/sample_codes/blob/main/TensorFlowBinaryClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip3 install tensorflow

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, accuracy_score
from random import randint
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import numpy as np

In [None]:
train_label = []
train_sample = []
for i in range(50):
  # 5% below 65 having side effects
  new_age = randint(13,64)
  train_sample.append(new_age)
  train_label.append(1)

  # 5% above 64 not having side effects
  new_age = randint(65, 100)
  train_sample.append(new_age)
  train_label.append(0)

for i in range(1000):
  # 95% below 65 not having side effects
  new_age = randint(13,64)
  train_sample.append(new_age)
  train_label.append(0)

  # 95% above 64 having side effects
  new_age = randint(65, 100)
  train_sample.append(new_age)
  train_label.append(1)

# Formatting data and getting ready in format acceptable by model
# Also suffling teh data for better training
train_sample = np.array(train_sample)
train_label = np.array(train_label)
train_sample, train_label = shuffle(train_sample, train_label)

# Scaling the data in the range (0,1) using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
scaled_train_sample = scaler.fit_transform(train_sample.reshape(-1,1))

In [None]:
train_model, test_model, train_model_label, test_model_label = train_test_split(scaled_train_sample, train_label, train_size = 0.9, random_state = 36)
# print(test_model.shape , '-', test_model_label.shape)

In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Number of GPUs Available: ", len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
# Dense is the Dense layer that is most widely used neural network layer which 
# is also known as fully connected layer. The first Dense layer is the first
# hidden layer while we do not haveto explicitly declare input layer.
model = Sequential(
    [Dense(units=32, activation='relu', input_shape=(1,)),
     Dense(units=16, activation='relu'),
     Dense(units=2, activation='softmax')]
)

# summary() of model instance can be used tovisually see wht has been created
# and how many parameters used
# model.summary()

In [None]:
# compile methods gets the model ready for training by passing all the necessary
# parameters and metrics
model.compile(optimizer=Adam(learning_rate=0.01), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# fit method actually does the training and gets model ready for validation
# batch_size is the total number of data points handled in each batch run
# so if batch size is 10 and total data points are 50 then number of batches
# will be 50/10 = 5 per epoch. This is something related to efficiency
# epochs : total number of iterations of complete data set done for training.
# shuffle is by default True so i am removing it from the fit function.
model.fit(
    x=train_model, y=train_model_label, batch_size=20, epochs=50, verbose=2
    )

In [None]:
# There are 2 ways, 1 : validation_data 2 : validation_split 
# Demonstrating the 1st way where the data should be a tuple (x_val, y_val)
# And second way where we use a split
model.fit(
     x=train_model
    ,y=train_model_label
    ,validation_data=valid_set
    ,epochs=50
    ,batch_size=20
    ,verbose=2
)

In [None]:
# valid_set for input of first method
valid_set = np.array(zip(list(test_model), list(test_model_label.reshape(-1,1))))

In [None]:
test_model_label = test_model_label.reshape(-1,1)

In [None]:
test_model.shape

In [None]:
valid_set = np.concatenate((test_model, test_model_label), axis=1)

In [None]:
test_label = []
test_sample = []
for i in range(10):
  # 5% below 65 having side effects
  new_age = randint(13,64)
  test_sample.append(new_age)
  test_label.append(1)

  # 5% above 64 not having side effects
  new_age = randint(65, 100)
  test_sample.append(new_age)
  test_label.append(0)

for i in range(200):
  # 95% below 65 not having side effects
  new_age = randint(13,64)
  test_sample.append(new_age)
  test_label.append(0)

  # 95% above 64 having side effects
  new_age = randint(65, 100)
  test_sample.append(new_age)
  test_label.append(1)

# Formatting data and getting ready in format acceptable by model
# Also suffling the data for better training
test_sample = np.array(test_sample)
test_label = np.array(test_label)
test_sample, test_label = shuffle(test_sample, test_label)

# Scaling the data in the range (0,1) using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
scaled_test_sample = scaler.fit_transform(test_sample.reshape(-1,1))

In [None]:
scaled_test_sample

In [None]:
predicted = model.predict(scaled_test_sample, verbose=1)

In [None]:
pred_label = []
for i,j in predicted:
  if i > j:
    pred_label.append(0)
  else:
    pred_label.append(1)

In [None]:
confusion_matrix(pred_label, test_label)

In [None]:
accuracy_score(test_label, pred_label)