In [1]:
import os
os.getcwd()

'/Users/nithinpradeep/AV/Projects/AI-vs-Real-Art-classification-end-to-end/research'

In [2]:
os.chdir('../')
os.getcwd()

'/Users/nithinpradeep/AV/Projects/AI-vs-Real-Art-classification-end-to-end'

## Step 1. Create Entity

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    class_fake: Path
    class_real: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list



## Step 2. Configuration

In [4]:
from AI_Real_Classifier.constants import *
from AI_Real_Classifier.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root]) #this will create the artifacts folder

    def get_training_config(self)-> TrainingConfig:
        training= self.config.model_trainer
        prepare_base_model= self.config.prepare_base_model
        params= self.params
        training_data= os.path.join(self.config.data_ingestion.unzip_dir, "dataset")
        class_fake= os.path.join(self.config.data_ingestion.unzip_dir, "dataset", "fakeV2", "fake-v2" )
        class_real= os.path.join(self.config.data_ingestion.unzip_dir, "dataset","real")
        create_directories([Path(training.root_dir)])

        training_config= TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path= Path(training.trained_model_path),
            updated_base_model_path= Path(prepare_base_model.updated_base_model_path),
            training_data= Path(training_data),
            class_fake= Path(class_fake),
            class_real= Path(class_real),
            params_epochs= params.EPOCHS,
            params_batch_size= params.BATCH_SIZE,
            params_is_augmentation= params.AUGMENTATION,
            params_image_size= params.IMAGE_SIZE

            )
        return training_config
        

## Step 3: Component

In [5]:
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time
import random
from AI_Real_Classifier import logging
from sklearn.model_selection import train_test_split
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
from keras.callbacks import ModelCheckpoint
from PIL import Image

class Training:
    def __init__(self, config: TrainingConfig):
        self.config= config
    
    def get_base_model(self):
        self.model= tf.keras.models.load_model(self.config.updated_base_model_path)

    @staticmethod
    def sample_fake_images(folder, num_samples):
        random.seed(42)
        return random.sample(os.listdir(folder), num_samples)

    @staticmethod
    def filter_large_images(image_dir, max_pixels=178956970):
            filtered_images = []
            for filename in os.listdir(image_dir):
                filepath = os.path.join(image_dir, filename)
                try:
                    with Image.open(filepath) as img:
                        if img.size[0] * img.size[1] <= max_pixels:
                            continue
                            #filtered_images.append(filename)
                except Exception as e:
                    logging.info(f"Error processing {filename}: {e}")
                    filtered_images.append(filename)
            return filtered_images

    @staticmethod
    def load_filenames_labels(folder, label, large_img, sampled_imgs=None):
        if sampled_imgs is None:
            sampled_imgs= os.listdir(folder)
        filenames = []
        labels = []
        for filename in os.listdir(folder):
            if (filename not in large_img) and (filename in sampled_imgs) :
                filenames.append(os.path.join(folder, filename))
                labels.append(label)
        return filenames, labels
    
    def pre_process(self):
        self.filtered_images_fake= self.filter_large_images(self.config.class_fake)
        self.filtered_images_real= self.filter_large_images(self.config.class_real)
        self.sampled_fake= self.sample_fake_images(self.config.class_fake,4000)
        self.class_fake_filenames, self.class_fake_labels= self.load_filenames_labels(self.config.class_fake, 
                                                                                      '0', 
                                                                                      self.filtered_images_fake,
                                                                                      self.sampled_fake)
        
        self.class_real_filenames, self.class_real_labels= self.load_filenames_labels(self.config.class_real, 
                                                                                      '1', 
                                                                                      self.filtered_images_real)
        
        self.all_file_names= self.class_fake_filenames+ self.class_real_filenames
        self.all_labels= self.class_fake_labels+ self.class_real_labels


        # Split the data into train and validation sets while maintaining class balance
        self.train_filenames, self.validation_filenames, self.train_labels, self.validation_labels = train_test_split(
            self.all_file_names, self.all_labels, test_size=0.2, stratify=self.all_labels, random_state=42)
    
    def train_valid_generator(self):
        
        self.train_datagen= ImageDataGenerator(rescale=1./255, preprocessing_function= preprocess_input)
        self.valid_datagen= ImageDataGenerator(rescale=1./255, preprocessing_function= preprocess_input)

    # Create the generator for training data
        self.train_generator = self.train_datagen.flow_from_dataframe(
            dataframe=pd.DataFrame({'filename': self.train_filenames, 'class': self.train_labels}),
            x_col='filename',
            y_col='class',
            target_size=(224,224),
            batch_size=self.config.params_batch_size,
            class_mode='categorical',
            shuffle=False,
            seed=42
        )

        # Create the generator for validation data
        self.validation_generator = self.valid_datagen.flow_from_dataframe(
            dataframe=pd.DataFrame({'filename': self.validation_filenames, 'class': self.validation_labels}),
            x_col='filename',
            y_col='class',
            target_size=(224,224),
            batch_size=self.config.params_batch_size,
            class_mode='categorical',
            shuffle=False,
            seed=42
        )

    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)

    def train(self):
        self.filepath= str(self.config.trained_model_path)
        self.checkpoint= ModelCheckpoint(filepath= self.filepath, 
                                         monitor="val_accuracy",
                                         verbose=1,
                                         save_best_only=True,
                                         mode='max')
        self.model.fit(self.train_generator,
                       steps_per_epoch=len(self.train_generator),
                       epochs= self.config.params_epochs,
                       validation_data= self.validation_generator,
                       validation_steps= len(self.validation_generator),
                       callbacks= [self.checkpoint])

    


In [6]:
try:
    config= ConfigurationManager()
    training_config= config.get_training_config()
    training= Training(config= training_config)
    training.get_base_model()
    training.pre_process()
    training.train_valid_generator()
    training.train()
except Exception as e:
    raise e

2024-04-18 22:32:57.785183: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-04-18 22:32:57.785211: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-04-18 22:32:57.785216: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-04-18 22:32:57.785249: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-04-18 22:32:57.785264: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Found 6224 validated image filenames belonging to 2 classes.
Found 1556 validated image filenames belonging to 2 classes.
Epoch 1/10


2024-04-18 22:33:03.870088: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


 5/98 [>.............................] - ETA: 1:09 - loss: 482.8845 - accuracy: 0.5063





2024-04-18 22:34:34.882602: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.



Epoch 1: val_accuracy improved from -inf to 0.84062, saving model to artifacts/training/model.h5
Epoch 2/10


  saving_api.save_model(


Epoch 2: val_accuracy improved from 0.84062 to 0.86440, saving model to artifacts/training/model.h5
Epoch 3/10
Epoch 3: val_accuracy did not improve from 0.86440
Epoch 4/10
Epoch 4: val_accuracy improved from 0.86440 to 0.88303, saving model to artifacts/training/model.h5
Epoch 5/10
Epoch 5: val_accuracy improved from 0.88303 to 0.88432, saving model to artifacts/training/model.h5
Epoch 6/10
Epoch 6: val_accuracy did not improve from 0.88432
Epoch 7/10
Epoch 7: val_accuracy improved from 0.88432 to 0.89010, saving model to artifacts/training/model.h5
Epoch 8/10
Epoch 8: val_accuracy improved from 0.89010 to 0.89717, saving model to artifacts/training/model.h5
Epoch 9/10
Epoch 9: val_accuracy did not improve from 0.89717
Epoch 10/10
Epoch 10: val_accuracy improved from 0.89717 to 0.90039, saving model to artifacts/training/model.h5
