In [None]:
# import libraries
import os
import cv2
import pandas as pd
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
import keras
from keras import layers
from keras import models
from keras import Model

from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras import utils, optimizers, callbacks
from keras.utils import to_categorical
from keras.layers import BatchNormalization, Activation
from keras.layers import Dropout
from keras.optimizers import Adam
from keras.optimizers.schedules import PiecewiseConstantDecay
from keras.callbacks import EarlyStopping
#from keras_tuners import RandomSearch

from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras.layers import Dense, GlobalAveragePooling2D

from keras.callbacks import ModelCheckpoint, TensorBoard


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
good_folder = '/content/drive/MyDrive/Neural Network And Deep Learning/validation_thumbnails/'
bad_folder = '/content/drive/MyDrive/Neural Network And Deep Learning/bad/'

# Create a dataframe to store image paths and labels
df = pd.DataFrame(columns=['Image_Path', 'Label'])

# Load "good" images and label them as "good"
good_images = os.listdir(good_folder)
df_good = pd.DataFrame({'Image_Path': [os.path.join(good_folder, img) for img in good_images],
                        'Label': 'good'})
df = pd.concat([df, df_good])

# Load "bad" images and label them as "bad"
bad_images = os.listdir(bad_folder)
df_bad = pd.DataFrame({'Image_Path': [os.path.join(bad_folder, img) for img in bad_images],
                       'Label': 'bad'})
df = pd.concat([df, df_bad])

print(df.head())


                                          Image_Path Label
0  /content/drive/MyDrive/Neural Network And Deep...  good
1  /content/drive/MyDrive/Neural Network And Deep...  good
2  /content/drive/MyDrive/Neural Network And Deep...  good
3  /content/drive/MyDrive/Neural Network And Deep...  good
4  /content/drive/MyDrive/Neural Network And Deep...  good


# CNN

Initial Modeling for training good and bad Youtube thumbnails

In [None]:
def preprocess_image(image_path, target_size=(224, 224)):

    image = cv2.imread(image_path)


    image = cv2.resize(image, target_size)


    image = image.astype('float32') / 255.0

    return image


df_train_subsampled = df.sample(n=1000, random_state=42)  # Adjust the sample size as needed


df_train_subsampled['Processed_Image'] = df_train_subsampled['Image_Path'].apply(preprocess_image)
# Assuming you have already loaded your DataFrame 'df' and defined the preprocess_image function


label_encoder = LabelEncoder()
df_train_subsampled['Encoded_Label'] = label_encoder.fit_transform(df_train_subsampled['Label'])


X_train_subsampled, X_val_subsampled, y_train_subsampled, y_val_subsampled = train_test_split(
    np.array(df_train_subsampled['Processed_Image'].tolist()),
    df_train_subsampled['Encoded_Label'],
    test_size=0.2,
    random_state=42
)


print(f"X_train_subsampled shape: {X_train_subsampled.shape}, y_train_subsampled shape: {y_train_subsampled.shape}")
print(f"X_val_subsampled shape: {X_val_subsampled.shape}, y_val_subsampled shape: {y_val_subsampled.shape}")


def build_and_compile_model(input_shape=(224, 224, 3), num_classes=1):

    model = models.Sequential()

    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(num_classes, activation='sigmoid'))


    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model


model = build_and_compile_model()


model.fit(X_train_subsampled, y_train_subsampled, epochs=20, batch_size=32, validation_data=(X_val_subsampled, y_val_subsampled))


X_train_subsampled shape: (800, 224, 224, 3), y_train_subsampled shape: (800,)
X_val_subsampled shape: (200, 224, 224, 3), y_val_subsampled shape: (200,)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7da231c601c0>

In [None]:
folder_path = '/content/drive/MyDrive/Neural Network And Deep Learning/good_thumbnails/'


file_list = os.listdir(folder_path)


for file_name in file_list:

    image_path = os.path.join(folder_path, file_name)


    test_image = preprocess_image(image_path)


    test_image = np.expand_dims(test_image, axis=0)


    predictions = model.predict(test_image)

    predicted_label = label_encoder.inverse_transform([round(predictions[0][0])])[0]

    print(f'Image: {file_name}, Predicted Label: {predicted_label}')


In [None]:
new_data_folder = '/content/drive/MyDrive/Neural Network And Deep Learning/good_thumbnails/'


def preprocess_image_for_prediction(image_path, target_size=(224, 224)):
    image = cv2.imread(image_path)
    image = cv2.resize(image, target_size)
    image = image.astype('float32') / 255.0
    return image


new_data_images = [f for f in os.listdir(new_data_folder) if f.endswith(('.jpg', '.jpeg', '.png'))]


model = build_and_compile_model()  # Replace with the actual path to your trained model


predictions = []

for image_file in new_data_images:
    image_path = os.path.join(new_data_folder, image_file)
    processed_image = preprocess_image_for_prediction(image_path)
    processed_image = np.expand_dims(processed_image, axis=0)  # Add batch dimension
    prediction = model.predict(processed_image)
    predictions.append({'image_file': image_file, 'prediction': prediction[0][0]})

for result in predictions:
    print(f"Image: {result['image_file']}, Prediction: {result['prediction']}")

advacned model - added learning rate and early stopping

In [None]:
def preprocess_image_for_prediction(image_path, target_size=(224, 224)):
    image = cv2.imread(image_path)
    image = cv2.resize(image, target_size)
    image = image.astype('float32') / 255.0
    return image



df_train_subsampled = df.sample(n=1000, random_state=42)  # Adjust the sample size as needed


df_train_subsampled['Processed_Image'] = df_train_subsampled['Image_Path'].apply(preprocess_image_for_prediction)


label_encoder = LabelEncoder()
df_train_subsampled['Encoded_Label'] = label_encoder.fit_transform(df_train_subsampled['Label'])


X_train_subsampled, X_val_subsampled, y_train_subsampled, y_val_subsampled = train_test_split(
    np.array(df_train_subsampled['Processed_Image'].tolist()),
    df_train_subsampled['Encoded_Label'],
    test_size=0.2,
    random_state=42
)

# Data augmentation
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Build a model with improved architecture
def build_and_compile_model(input_shape=(224, 224, 3), num_classes=1):
    model = models.Sequential()

    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(num_classes, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
    #model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model


model = build_and_compile_model()

# Model callbacks
#early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)
tensorboard = TensorBoard(log_dir='./logs', histogram_freq=1)

# Train the model with data augmentation
history = model.fit(
    train_datagen.flow(X_train_subsampled, y_train_subsampled, batch_size=12),
    epochs=20,
    validation_data=(X_val_subsampled, y_val_subsampled),
    #callbacks=[early_stopping, model_checkpoint, tensorboard]
    callbacks=[model_checkpoint, tensorboard]
)

# Evaluate the model on the test dataset
test_loss, test_accuracy = model.evaluate(X_val_subsampled, y_val_subsampled)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')


Epoch 1/20

  saving_api.save_model(


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test Loss: 9.537700653076172, Test Accuracy: 0.6800000071525574


# vgg 16

In [None]:
good_train_data_dir = '/content/drive/MyDrive/Neural Network And Deep Learning/validation_thumbnails/'
bad_train_data_dir = '/content/drive/MyDrive/Neural Network And Deep Learning/bad/'
random_test_data_dir = '/content/drive/MyDrive/Neural Network And Deep Learning/good_thumbnails/'

In [None]:
# Subsample size for each dataset
subsample_size = 1000

# Data augmentation and normalization
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)


good_images = []
good_labels = []

for filename in os.listdir(good_train_data_dir):
    img = cv2.imread(os.path.join(good_train_data_dir, filename))
    img = cv2.resize(img, (224, 224))
    img = img / 255.0  # Normalize pixel values
    good_images.append(img)
    good_labels.append(1)  # Assign label 1 for 'good'

bad_images = []
bad_labels = []

for filename in os.listdir(bad_train_data_dir):
    img = cv2.imread(os.path.join(bad_train_data_dir, filename))
    img = cv2.resize(img, (224, 224))
    img = img / 255.0  # Normalize pixel values
    bad_images.append(img)
    bad_labels.append(0)  # Assign label 0 for 'bad'


good_images = np.array(good_images)
good_labels = np.array(good_labels)
bad_images = np.array(bad_images)
bad_labels = np.array(bad_labels)


all_images = np.concatenate([good_images, bad_images])
all_labels = np.concatenate([good_labels, bad_labels])


X_train, X_val, y_train, y_val = train_test_split(
    all_images, all_labels, test_size=0.2, random_state=42)



In [None]:
random_test_images = []
random_test_labels = []

for filename in os.listdir(random_test_data_dir):
    img = cv2.imread(os.path.join(random_test_data_dir, filename))
    img = cv2.resize(img, (224, 224))
    img = img / 255.0  # Normalize pixel values
    random_test_images.append(img)
    random_test_labels.append(0.5)

random_test_images = np.array(random_test_images)
random_test_labels = np.array(random_test_labels)

In [None]:
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

model = models.Sequential()
model.add(base_model)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model on 'good' and 'bad' datasets
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val))

# Evaluate the model on the random test dataset
test_loss, test_accuracy = model.evaluate(random_test_images, random_test_labels)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.752239465713501, Test Accuracy: 0.0
