# Model (VGG16)

### Library Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import pickle
import time
import os
import json
import seaborn as sns
import tensorflow as tf
import random
import cv2

from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.models import Sequential

from keras.applications import VGG16
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.optimizers import Adam

from pandas import json_normalize
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score, f1_score,classification_report
from sklearn.preprocessing import normalize,StandardScaler,RobustScaler,MinMaxScaler
from sklearn.svm import SVC,LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.compose import ColumnTransformer


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Flatten, Conv2D, MaxPooling2D,LeakyReLU
from tensorflow.keras.regularizers import l1, l2, l1_l2
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from keras.callbacks import EarlyStopping
from tensorflow.keras.datasets import mnist, cifar10
from tensorflow import keras
from keras.utils import to_categorical


from scipy.stats import skew

### Data Imports

In [2]:
with open('pickles/X.pkl', 'rb') as f:
    X = pickle.load(f)

In [3]:
with open('pickles/y.pkl', 'rb') as f:
    y = pickle.load(f)

### Train Test Split

To Train-Test split the data, we'll be seperating X on index to prevent all of the data from being loaded into the kernel concurrently.

In [4]:
idx = np.arange(X.shape[0])

In [5]:
train_idx, test_idx, y_train, y_val = train_test_split(idx, y, test_size=0.2, random_state=42)

In [6]:
X_train = X[train_idx]

In [7]:
X_val = X[test_idx]

In [8]:
len(X_train)

228524

In [9]:
num_classes = len(np.unique(y))
num_classes

75

### Mapping Labels

In [10]:
label_map = {label: i for i, label in enumerate(np.unique(y_train))}

y_train_int = np.array([label_map[label] for label in y_train])
y_val_int = np.array([label_map[label] for label in y_val])

y_train = to_categorical(y_train_int, num_classes=75)
y_val = to_categorical(y_val_int, num_classes=75)

### Data Generators

We'll feed our data into the model in batches of 32 so that all images aren’t loaded into the kernel at the same time. Within this function, we'll divide X_low by 255, to normalize the data.

This operation is split into two seperate generators to prevent data leakage from the training set into the validation set. Since the function has been seperated, each function will only be called when needed.

In [11]:
def train_data_generator(X_train, y_train, batch_size):
    num_train_samples = len(X_train)
    train_indices = np.arange(num_train_samples)
    np.random.shuffle(train_indices)

    while True:
        for start_idx in range(0, num_train_samples, batch_size):
            end_idx = min(start_idx + batch_size, num_train_samples)
            batch_indices = train_indices[start_idx:end_idx]
            X_batch_train = X_train[batch_indices]
            y_batch_train = y_train[batch_indices]

            # Normalize the input data to [0, 1]
            X_batch_train = X_batch_train.astype('float32') / 255.0

            yield (X_batch_train, y_batch_train)

In [12]:
def val_data_generator(X_val, y_val, batch_size):
    num_val_samples = len(X_val)
    val_indices = np.arange(num_val_samples)
    np.random.shuffle(val_indices)

    while True:
        for start_idx_val in range(0, num_val_samples, batch_size):
            end_idx_val = min(start_idx_val + batch_size, num_val_samples)
            batch_indices_val = val_indices[start_idx_val:end_idx_val]
            X_batch_val = X_val[batch_indices_val]
            y_batch_val = y_val[batch_indices_val]

            # Normalize the input
            X_batch_val = X_batch_val.astype('float32') / 255.0

            yield (X_batch_val, y_batch_val)


# Define the batch size
batch_size = 32

train_generator = train_data_generator(X_train, y_train, batch_size)
val_generator = val_data_generator(X_val, y_val, batch_size)

### Instantiating VGG16 Model

In [18]:
# Load the VGG16 model and exclude the top layer
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the pre-trained layers
for layer in base_model.layers:
    layer.trainable = False

model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(75, activation='softmax'))

In [19]:
model.compile(optimizer=Adam(lr=1e-5),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [20]:
early_stopping = EarlyStopping(monitor='val_loss', patience=15)


In [26]:
history = model.fit(
    train_generator,
    steps_per_epoch=len(X_train) // batch_size,
    validation_data=val_generator,
    validation_steps=len(X_val) // batch_size,
    callbacks = [early_stopping]
)

