In [13]:
!pip install tqdm






# Import-Libraries

In [14]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model, save_model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import tensorflow as tf
from PIL import Image

# Config class

In [16]:
class Config:
    BASE_PATH = "C:/Users/Shamila/OneDrive/Desktop/CI_Assign"
    IMG_SIZE = 224
    RANDOM_STATE = 42
    TEST_SIZE = 0.2
    VAL_SIZE = 0.15
    BATCH_SIZE = 32
    EPOCHS = 30
    NUM_CLASSES = None 
    
    # Visualization settings
    PLOT_STYLE = 'ggplot'
    FIG_SIZE = (10, 6)
    CMAP = 'YlGnBu'

# Set-matplotlib-style

In [17]:
plt.style.use(Config.PLOT_STYLE)


# Data-Loader-Class

In [18]:
class DataLoader:
    def __init__(self):
        self.train_df = pd.read_csv(os.path.join(Config.BASE_PATH, "train.csv"))
        self.test_df = pd.read_csv(os.path.join(Config.BASE_PATH, "test.csv"))
        self.le = LabelEncoder()
        self.class_names = None

    def _analyze_images(self, df):
        "dimensions and aspect ratios"
        widths = []
        heights = []
        aspect_ratios = []

        for _, row in df.iterrows():
            img_path = os.path.join(Config.BASE_PATH, row['filename'])
            try:
                with Image.open(img_path) as img:
                    width, height = img.size
                    widths.append(width)
                    heights.append(height)
                    aspect_ratios.append(width / height)
            except:
                continue

        return widths, heights, aspect_ratios

    def plot_image_stats(self):
        "statistics visualizations"
        widths, heights, aspect_ratios = self._analyze_images(self.train_df)

        fig, axes = plt.subplots(2, 2, figsize=(14, 12))

        sns.histplot(widths, bins=30, color='mediumseagreen', ax=axes[0, 0], kde=True)
        axes[0, 0].set_title('Image Width Distribution')
        axes[0, 0].set_xlabel('Width (pixels)')
        axes[0, 0].set_ylabel('Count')

        sns.histplot(heights, bins=30, color='seagreen', ax=axes[0, 1], kde=True)
        axes[0, 1].set_title('Image Height Distribution')
        axes[0, 1].set_xlabel('Height (pixels)')
        axes[0, 1].set_ylabel('Count')

        sns.histplot(aspect_ratios, bins=30, color='palegreen', ax=axes[1, 0], kde=True)
        axes[1, 0].set_title('Aspect Ratio Distribution')
        axes[1, 0].set_xlabel('Aspect Ratio (width/height)')
        axes[1, 0].set_ylabel('Count')

        sns.scatterplot(x=widths, y=heights, alpha=0.6, color='forestgreen', ax=axes[1, 1])
        axes[1, 1].set_title('Width vs Height Scatter Plot')
        axes[1, 1].set_xlabel('Width (pixels)')
        axes[1, 1].set_ylabel('Height (pixels)')

        plt.tight_layout()
        plt.savefig('statistics.png')
        plt.close()

        # Boxplot-aspect-ratios
        plt.figure(figsize=Config.FIG_SIZE)
        sns.boxplot(x=aspect_ratios, color='mediumseagreen')
        plt.title('Aspect Ratio Boxplot')
        plt.xlabel('Aspect Ratio')
        plt.savefig('aspectratioboxplot.png')
        plt.close()

    def plot_class_distribution(self):
        "Plot class distribution as pie chart and bar graph"
        class_counts = self.train_df['class'].value_counts()
        self.class_names = class_counts.index.tolist()
        Config.NUM_CLASSES = len(self.class_names)

        # Pie-chart
        plt.figure(figsize=Config.FIG_SIZE)
        plt.pie(class_counts, labels=self.class_names, autopct='%1.1f%%',
                startangle=90, colors=sns.color_palette('Greens', len(class_counts)))
        plt.title('Class Distribution (Pie Chart)')
        plt.savefig('piechart.png')
        plt.close()

        # Bar-graph
        plt.figure(figsize=Config.FIG_SIZE)
        sns.barplot(x=class_counts.index, y=class_counts.values, palette='Greens')
        plt.title('Class Distribution (Bar Graph)')
        plt.xlabel('Class')
        plt.ylabel('Count')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig('distribution-bar.png')
        plt.close()

    def load_data(self):
        "Load and preprocess images"
        def _process_df(df, label=True):
            X, y = [], []
            for _, row in df.iterrows():
                img = cv2.imread(os.path.join(Config.BASE_PATH, row['filename']))
                if img is None:
                    continue

                img_resized = cv2.resize(img, (Config.IMG_SIZE, Config.IMG_SIZE))
                X.append(preprocess_input(img_resized))

                if label:
                    y.append(row['class'])

            return (np.array(X), np.array(y)) if label else np.array(X)

        # Plot-datastatistics
        self.plot_image_stats()
        self.plot_class_distribution()

        # Load-trainingdata
        X_train, y_train = _process_df(self.train_df, label=True)
        X_test = _process_df(self.test_df, label=False)

        # Encode-labels
        y_encoded = self.le.fit_transform(y_train)

        return {
            'X_train': X_train,
            'y_train': y_encoded,
            'X_test': X_test,
            'class_names': self.class_names
        }


# Neural-Network-Model-Class

In [19]:
class NeuralNetworkModel:
    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.model = self.build_model()
        self.history = None
    
    def build_model(self):
        " transfer learning used Build to EfficientNetB0 based model"
        base_model = EfficientNetB0(
            include_top=False,
            weights='imagenet',
            input_shape=(Config.IMG_SIZE, Config.IMG_SIZE, 3)
        )
        
        #  Fine-tune-later-ones and Freeze-initial-layers
        for layer in base_model.layers[:100]:
            layer.trainable = False
        for layer in base_model.layers[100:]:
            layer.trainable = True
        
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(1024, activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        outputs = Dense(self.num_classes, activation='softmax')(x)
        
        model = Model(inputs=base_model.input, outputs=outputs)
        
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        
        return model
    
    def get_data_augmenter(self):
        " augmentation used to create image data generator "
        return ImageDataGenerator(
            rotation_range=30,
            zoom_range=0.2,
            width_shift_range=0.2,
            height_shift_range=0.2,
            horizontal_flip=True,
            brightness_range=[0.8, 1.2],
            shear_range=0.1,
            fill_mode='nearest',
            validation_split=Config.VAL_SIZE
        )
    
    def train(self, X_train, y_train):
        "data augmentation and callbacks used to train the model "
        datagen = self.get_data_augmenter()
        
        # Create - callbacks
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, verbose=1),
            ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True)
        ]
        
        # Train model
        self.history = self.model.fit(
            datagen.flow(X_train, y_train, batch_size=Config.BATCH_SIZE, subset='training'),
            steps_per_epoch=int(len(X_train) * (1 - Config.VAL_SIZE) // Config.BATCH_SIZE),
            
            epochs=Config.EPOCHS,
            validation_data=datagen.flow(X_train, y_train, batch_size=Config.BATCH_SIZE, subset='validation'),
            validation_steps=int(len(X_train) * Config.VAL_SIZE // Config.BATCH_SIZE),
            
            callbacks=callbacks,
            verbose=1
                )

    
    def plot_training_history(self):
        "Plot training & validation accuracy-loss"
        if self.history is None:
            print("Model hasn't been trained yet!")
            return
        
        history = self.history.history
        
        plt.figure(figsize=(14, 5))
        
        # Plot-accuracy
        plt.subplot(1, 2, 1)
        plt.plot(history['accuracy'], label='Train Accuracy')
        plt.plot(history['val_accuracy'], label='Validation Accuracy')
        plt.title('Model Accuracy')
        plt.ylabel('Accuracy')
        plt.xlabel('Epoch')
        plt.legend()
        
        # Plot-loss
        plt.subplot(1, 2, 2)
        plt.plot(history['loss'], label='Train Loss')
        plt.plot(history['val_loss'], label='Validation Loss')
        plt.title('Model Loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend()
        
        plt.tight_layout()
        plt.savefig('traininghistory.png')
        plt.close()
    
    def evaluate(self, X_val, y_val, class_names):
        "Evaluate model & plot confusion matrix"
        y_pred = np.argmax(self.model.predict(X_val), axis=1)
        
        # Classification-report
        print("Classification Report:")
        print(classification_report(y_val, y_pred, target_names=class_names))
        
        # Confusion-matrix
        cm = confusion_matrix(y_val, y_pred)
        
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap=Config.CMAP, 
                    xticklabels=class_names, yticklabels=class_names)
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.xticks(rotation=45)
        plt.yticks(rotation=0)
        plt.tight_layout()
        plt.savefig('confusionmatrix.png')
        plt.close()

# Main Pipeline

In [20]:
def main():
    # Load - analyze data
    loader = DataLoader()
    data = loader.load_data()
    
    # Split-data
    X_train, X_test = data['X_train'], data['X_test']
    y_train = data['y_train']
    class_names = data['class_names']
    
    # Further-split-into-train-validation
    X_train, X_val, y_train, y_val = train_test_split(
        X_train, y_train,
        test_size=Config.TEST_SIZE,
        stratify=y_train,
        random_state=Config.RANDOM_STATE
    )
    
    # Initialize-&-trainmodel
    model = NeuralNetworkModel(Config.NUM_CLASSES)
    model.train(X_train, y_train)
    
    # Plot-training-history
    model.plot_training_history()
    
    # Evaluate-model
    model.evaluate(X_val, y_val, class_names)
    
    # model save
    model.model.save('finalmodel.h5')
    
    # Make-predictions-on-test-set
    test_preds = np.argmax(model.model.predict(X_test), axis=1)
    test_labels = loader.le.inverse_transform(test_preds)
    
    # Create-submission-file
    submission = pd.DataFrame({
        "id": loader.test_df["id"],
        "label": test_labels
    })
    submission.to_csv("final_sub.csv", index=False)
    print("\n Final-sub-saved!")

if __name__ == "__main__":
    main()


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=class_counts.index, y=class_counts.values, palette='Greens')
  self._warn_if_super_not_called()


Epoch 1/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.2606 - loss: 2.5792



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 4s/step - accuracy: 0.2641 - loss: 2.5592 - val_accuracy: 0.4583 - val_loss: 1.4115 - learning_rate: 1.0000e-04
Epoch 2/30
[1m 1/21[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m21s[0m 1s/step - accuracy: 0.1250 - loss: 3.3527



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 398ms/step - accuracy: 0.1250 - loss: 3.3527 - val_accuracy: 0.5000 - val_loss: 1.4015 - learning_rate: 1.0000e-04
Epoch 3/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - accuracy: 0.5514 - loss: 1.2395



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 4s/step - accuracy: 0.5538 - loss: 1.2323 - val_accuracy: 0.7396 - val_loss: 1.0739 - learning_rate: 1.0000e-04
Epoch 4/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 224ms/step - accuracy: 0.6875 - loss: 0.8045 - val_accuracy: 0.7292 - val_loss: 1.0273 - learning_rate: 1.0000e-04
Epoch 5/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.7732 - loss: 0.5913



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 4s/step - accuracy: 0.7747 - loss: 0.5884 - val_accuracy: 0.9271 - val_loss: 0.6917 - learning_rate: 1.0000e-04
Epoch 6/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 327ms/step - accuracy: 0.8750 - loss: 0.3264 - val_accuracy: 0.8542 - val_loss: 0.7360 - learning_rate: 1.0000e-04
Epoch 7/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.8838 - loss: 0.3114



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 3s/step - accuracy: 0.8844 - loss: 0.3107 - val_accuracy: 0.9375 - val_loss: 0.4727 - learning_rate: 1.0000e-04
Epoch 8/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 201ms/step - accuracy: 0.9062 - loss: 0.3581 - val_accuracy: 0.9271 - val_loss: 0.4836 - learning_rate: 1.0000e-04
Epoch 9/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 3s/step - accuracy: 0.9116 - loss: 0.2377 - val_accuracy: 0.9375 - val_loss: 0.3409 - learning_rate: 1.0000e-04
Epoch 10/30
[1m 1/21[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 642ms/step - accuracy: 0.8750 - loss: 0.5101



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 245ms/step - accuracy: 0.8750 - loss: 0.5101 - val_accuracy: 0.9583 - val_loss: 0.2670 - learning_rate: 1.0000e-04
Epoch 11/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.9334 - loss: 0.1852



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 4s/step - accuracy: 0.9341 - loss: 0.1842 - val_accuracy: 0.9688 - val_loss: 0.2146 - learning_rate: 1.0000e-04
Epoch 12/30
[1m 1/21[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:13[0m 4s/step - accuracy: 0.9375 - loss: 0.1418



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 443ms/step - accuracy: 0.9375 - loss: 0.1418 - val_accuracy: 0.9896 - val_loss: 0.1822 - learning_rate: 1.0000e-04
Epoch 13/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 4s/step - accuracy: 0.9483 - loss: 0.1528 - val_accuracy: 0.9688 - val_loss: 0.1465 - learning_rate: 1.0000e-04
Epoch 14/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 707ms/step - accuracy: 0.9688 - loss: 0.1909 - val_accuracy: 0.9896 - val_loss: 0.1324 - learning_rate: 1.0000e-04
Epoch 15/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.9644 - loss: 0.1277



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 4s/step - accuracy: 0.9643 - loss: 0.1278 - val_accuracy: 1.0000 - val_loss: 0.0824 - learning_rate: 1.0000e-04
Epoch 16/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 278ms/step - accuracy: 0.9375 - loss: 0.1677 - val_accuracy: 0.9896 - val_loss: 0.0835 - learning_rate: 1.0000e-04
Epoch 17/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 3s/step - accuracy: 0.9680 - loss: 0.1092 - val_accuracy: 0.9896 - val_loss: 0.0570 - learning_rate: 1.0000e-04
Epoch 18/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 295ms/step - accuracy: 0.9688 - loss: 0.0648 - val_accuracy: 1.0000 - val_loss: 0.0506 - learning_rate: 1.0000e-04
Epoch 19/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 3s/step - accuracy: 0.9664 - loss: 0.1067 - val_accuracy: 1.0000 - val_loss: 0.0268 - learning_rate: 1.0000e-04
Epoch 20/30
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0



[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 926ms/step

 Final-sub-saved!
