In [1]:
import os
import cv2
import scipy
import matplotlib.pyplot as plt
import sys
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
import pathlib


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import zipfile

file_name = 'training1.zip'

with zipfile.ZipFile('/content/drive/My Drive/' + file_name, 'r') as zip_ref:
    zip_ref.extractall('/content/tmp')

In [4]:
cwd = os.getcwd()
print(cwd)
train_data_dir = "tmp/training"
train_imgs = os.listdir(os.path.join(cwd,train_data_dir))
data_dir = os.path.join(cwd, train_data_dir)
print(data_dir)

/content
/content/tmp/training


In [5]:
batch_size = 32
img_height = 64
img_width = 64

In [6]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.3,
  subset="training",
  seed = 123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  label_mode = 'binary'
  )

Found 32107 files belonging to 2 classes.
Using 22475 files for training.


In [7]:
val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.3,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  label_mode = 'binary'

  )

Found 32107 files belonging to 2 classes.
Using 9632 files for validation.


In [8]:
class_names = train_ds.class_names
print(class_names)

['0', '1']


In [9]:
for image_batch, labels_batch in train_ds:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

(32, 64, 64, 3)
(32, 1)


In [10]:
normalization_layer = tf.keras.layers.Rescaling(1./255)

In [11]:
normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
# Notice the pixel values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))

0.11476764 0.99661225


In [12]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [13]:
num_classes = 2

model = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255),
    tf.keras.layers.Flatten(),  # Flatten layer to convert input to 1D
    tf.keras.layers.Dense(128, activation='sigmoid'),  # Hidden layer with sigmoid activation
    tf.keras.layers.Dense(64, activation='sigmoid'),  # Hidden layer with sigmoid activation
    tf.keras.layers.Dense(32, activation='sigmoid'),  # Hidden layer with sigmoid activation

    tf.keras.layers.Dense(num_classes, activation='relu')  # Output layer with ReLU activation
])


In [14]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',  # Use 'binary_crossentropy' for binary classification
    metrics=['accuracy']
)

In [15]:
model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=3,
  shuffle = True
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7d36d3947ca0>

In [16]:
test_loss, test_accuracy = model.evaluate(val_ds)

print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.2%}')

Test Loss: 0.6931
Test Accuracy: 49.73%


In [21]:
model_chatgpt = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary classification, so using sigmoid activation
])

In [22]:
model_chatgpt.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [23]:
model_chatgpt.fit(
  train_ds,
  validation_data=val_ds,
  epochs=3,
  shuffle = True
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7d36d39149d0>

In [24]:
test_loss, test_accuracy = model_chatgpt.evaluate(val_ds)

print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.2%}')

Test Loss: 0.0195
Test Accuracy: 99.34%


In [28]:
# !rm -rf '/content/tmp/test'

In [29]:
file_name = "test.zip"
with zipfile.ZipFile('/content/drive/My Drive/' + file_name, 'r') as zip_ref:
    zip_ref.extractall('/content/tmp')
cwd = os.getcwd()
print(cwd)
test_data_dir = "tmp/test"
test_imgs = os.listdir(os.path.join(cwd,test_data_dir))


/content


In [38]:
import os
import csv
import tensorflow as tf

def predict_and_save_to_csv(model, data_dir, img_height, img_width, batch_size, output_csv):
    # Create an image dataset from the directory
    image_dataset = tf.keras.utils.image_dataset_from_directory(
        data_dir,
        image_size=(img_height, img_width),
        batch_size=batch_size,
        label_mode=None,  # This will keep file names as labels
        shuffle=False,  # No need to shuffle for predictions
    )
    print("classname: ", image_dataset.class_names)
    # Extract filenames from the dataset
    filenames = image_dataset.file_paths

    # Predict classes using the model
    predictions = model.predict(image_dataset)
    print(predictions)
    # Convert predictions to class labels (assuming binary classification)
    class_labels = (predictions > 0.5).astype(int)

    # Write results to a CSV file
    with open(output_csv, mode='w', newline='') as csvfile:
        fieldnames = ['Filename', 'Prediction']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for filename, label in zip(filenames, class_labels):
            writer.writerow({'Filename': os.path.basename(filename), 'Prediction': label})

# Example usage
output_csv_file = 'tmp/predictions.csv'
predict_and_save_to_csv(model_chatgpt, test_data_dir, img_height, img_width, batch_size, output_csv_file)


Found 7893 files belonging to 1 classes.
classname:  ['']
[[1.0000000e+00]
 [1.7047164e-04]
 [6.1204482e-05]
 ...
 [3.0970035e-04]
 [3.9874492e-04]
 [9.6557516e-01]]
