<a href="https://colab.research.google.com/github/Khusheeey/JEI-Epochs-Batch-size-Investigation-Code./blob/main/JEI_Epochs_Investigation_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#NUMBEER OF EPOCHS & TRIAL NUMBER
epoch_num=100
trial_num=10

In [None]:
import tensorflow as tf
import os

# Avoid OOM errors by setting GPU Memory Consumption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
tf.config.list_physical_devices('GPU')

# Mount drive and change directory to folder location containing the dataset
from google.colab import drive
drive.mount('/content/drive')
%cd '/content/drive/My Drive/JEI pulsar train data'

In [None]:
import numpy as np
from matplotlib import pyplot as plt

# create data pipeline
data = tf.keras.utils.image_dataset_from_directory('/content/drive/My Drive/JEI pulsar train data',batch_size=batch_size)
data_iterator = data.as_numpy_iterator()
batch = data_iterator.next()

# 1 = PULSAR, 0 = NOT PULSAR
# viewing 4 images from a batch
fig, ax = plt.subplots(ncols=4, figsize=(20,20))
for idx, img in enumerate(batch[0][:4]):
    ax[idx].imshow(img.astype(int))
    ax[idx].title.set_text(batch[1][idx])

In [None]:
# dividing by 255 to make values btw 0-1

data = data.map(lambda x,y: (x/255, y))
scaled_iterator = data.as_numpy_iterator()
batch = scaled_iterator.next()

# viewing 4 images from a batch
fig, ax = plt.subplots(ncols=4, figsize=(20,20))
for idx, img in enumerate(batch[0][:4]):
    ax[idx].imshow(img)
    ax[idx].title.set_text(batch[1][idx])

In [None]:
# splitting data into training and validation sets (100 of the 120 for training, and 20 of the 120 for testing)

train_size = int(len(data)*(100/120))
val_size = int(len(data)*(20/120))

train = data.take(train_size)
val = data.skip(train_size).take(val_size)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout

model = Sequential()

#Building the model
model.add(Conv2D(16, (3,3), 1, activation='relu', input_shape=(256,256,3)))
model.add(MaxPooling2D())

model.add(Conv2D(32, (3,3), 1, activation='relu', input_shape=(256,256,3)))
model.add(MaxPooling2D())

model.add(Flatten())

model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile('adam', loss=tf.losses.BinaryCrossentropy(), metrics=['accuracy'])

In [None]:
 # initiating training
hist = model.fit(train, epochs=epoch_num, validation_data=val)

Epoch 1/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 353ms/step - accuracy: 0.5928 - loss: 1.5228 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 2/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 441ms/step - accuracy: 0.4986 - loss: 0.6932 - val_accuracy: 0.4000 - val_loss: 0.6940
Epoch 3/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 328ms/step - accuracy: 0.5706 - loss: 0.6925 - val_accuracy: 0.3333 - val_loss: 0.6957
Epoch 4/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 491ms/step - accuracy: 0.4240 - loss: 0.6943 - val_accuracy: 0.5667 - val_loss: 0.6922
Epoch 5/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 338ms/step - accuracy: 0.5230 - loss: 0.6928 - val_accuracy: 0.4667 - val_loss: 0.6938
Epoch 6/100
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 474ms/step - accuracy: 0.4973 - loss: 0.6932 - val_accuracy: 0.4000 - val_loss: 0.6951
Epoch 7/100
[1m19/19

**TESTING SECTION**


In [None]:
# changing to directory with test dataset
%cd '/content/drive/My Drive/JEI pulsar test data'

test_data = tf.keras.utils.image_dataset_from_directory('/content/drive/My Drive/JEI pulsar test data',batch_size=1)

test_data_iterator = test_data.as_numpy_iterator()
test_batch = test_data_iterator.next()

test_data = test_data.map(lambda x,y: (x/255, y))
test_scaled_iterator = test_data.as_numpy_iterator()
test_batch = test_scaled_iterator.next()

In [None]:
test_size = int(len(test_data))
test = test_data.take(test_size)

In [None]:
# making predcitions and evaluation based on metrics of Precision, Recall, and BinaryAccuracy
from tensorflow.keras.metrics import Precision, Recall, BinaryAccuracy

pre = Precision()
re = Recall()
acc = BinaryAccuracy()

for batch in test.as_numpy_iterator():
    X, y = batch
    yhat = model.predict(X)
    pre.update_state(y, yhat)
    re.update_state(y, yhat)
    acc.update_state(y, yhat)

test_precision=float(pre.result().numpy())
test_recall=float(re.result().numpy())
test_accuracy=float(acc.result().numpy())

test_data={'test_accuracy':test_accuracy, 'test_precision':test_precision, 'test_recall':test_recall}
print(f'Precision: {test_precision}, Recall: {test_recall}, Accuracy: {test_accuracy}')

# **LOAD DATA INTO CSV**

In [None]:
# MAKING DICTIONARY W/ DATA (w/ all training accuracies in a list)

# train_data refers to the data stored of metric values from training process
train_data = hist.history

**W/ JUST FINAL TRAINING ACCURACY** (this includes only the final training accuracy and loss for all the epochs run)

In [None]:
# getting each last value from training data and put it into a dictionary
end_accuracy, end_loss, end_val_accuracy,end_val_loss=train_data['accuracy'][-1], train_data['loss'][-1],train_data['val_accuracy'][-1], train_data['val_loss'][-1]
end_train_data = {'accuracy': end_accuracy, 'loss': end_loss, 'val_accuracy':end_val_accuracy, 'val_loss':end_val_loss}

# make another dict with epoch and trial_num for training data with only end training accuracies, add that, and then also test data to end_all_info
end_all_info={'epochs': epoch_num, 'trial_num' : trial_num}
end_all_info.update(end_train_data)
# adding test data to "end_all_info" dictionary
end_all_info.update(test_data)

In [None]:
# Specify the CSV file name
end_file_path = '/content/drive/My Drive/Epochs data.csv'

with open(end_file_path, mode='a', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=end_all_info.keys())

    # Write the header only if the file is empty
    if file.tell() == 0:
      writer.writeheader()

    # Write the dictionary as a row in the CSV file
    writer.writerow(end_all_info)

print(f"Data appended to {end_file_path} successfully.")