In [1]:
!pip install --quiet tensorflow opencv-python pandas scikit-learn
!pip install scikit-image scikit-plot

Collecting scikit-image
  Downloading scikit_image-0.25.2-cp311-cp311-win_amd64.whl.metadata (14 kB)
Collecting scikit-plot
  Downloading scikit_plot-0.3.7-py3-none-any.whl.metadata (7.1 kB)
Collecting networkx>=3.0 (from scikit-image)
  Downloading networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Collecting pillow>=10.1 (from scikit-image)
  Downloading pillow-11.3.0-cp311-cp311-win_amd64.whl.metadata (9.2 kB)
Collecting imageio!=2.35.0,>=2.33 (from scikit-image)
  Downloading imageio-2.37.0-py3-none-any.whl.metadata (5.2 kB)
Collecting tifffile>=2022.8.12 (from scikit-image)
  Downloading tifffile-2025.6.11-py3-none-any.whl.metadata (32 kB)
Collecting lazy-loader>=0.4 (from scikit-image)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Collecting matplotlib>=1.4.0 (from scikit-plot)
  Downloading matplotlib-3.10.3-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib>=1.4.0->scikit-plot)
  Downloading contourpy-1.3.2-cp311-cp311-win_am

In [3]:
!pip install scipy==1.9.3

Collecting scipy==1.9.3
  Downloading scipy-1.9.3-cp311-cp311-win_amd64.whl.metadata (58 kB)
Collecting numpy<1.26.0,>=1.18.5 (from scipy==1.9.3)
  Downloading numpy-1.25.2-cp311-cp311-win_amd64.whl.metadata (5.7 kB)
Downloading scipy-1.9.3-cp311-cp311-win_amd64.whl (39.9 MB)
   ---------------------------------------- 0.0/39.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/39.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/39.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/39.9 MB ? eta -:--:--
   ---------------------------------------- 0.3/39.9 MB ? eta -:--:--
   ---------------------------------------- 0.3/39.9 MB ? eta -:--:--
   ---------------------------------------- 0.3/39.9 MB ? eta -:--:--
    --------------------------------------- 0.5/39.9 MB 399.6 kB/s eta 0:01:39
    --------------------------------------- 0.5/39.9 MB 399.6 kB/s eta 0:01:39
    --------------------------------------- 0.8/39.9 MB 472.8 kB/s eta 0:01

  You can safely remove it manually.
  You can safely remove it manually.
  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
scikit-image 0.25.2 requires scipy>=1.11.4, but you have scipy 1.9.3 which is incompatible.
tensorflow 2.19.0 requires numpy<2.2.0,>=1.26.0, but you have numpy 1.25.2 which is incompatible.


In [4]:
#import_libraries
import os
import numpy as np
import pandas as pd
import cv2
import pickle
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score
from skimage.feature import hog
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import scikitplot as skplt


In [None]:
#configuration
class Config:
    BASE_PATH = "C:/Users/Shamila/OneDrive/Desktop/CI_Assign"
    IMG_SIZE = 224
    RANDOM_STATE = 42
    TEST_SIZE = 0.2
    BATCH_SIZE = 32
    EPOCHS = 35
    
    # Feature extraction
    HOG_ORIENTATIONS = 8
    HOG_PIXELS_PER_CELL = (16, 16)
    HIST_BINS = [8, 8, 8]
    
    # Ensemble weights
    NN_WEIGHT = 0.6
    RF_WEIGHT = 0.25
    SVM_WEIGHT = 0.15

In [11]:
#Data loader
class DataLoader:
    def __init__(self):
        self.train_df = pd.read_csv(os.path.join(Config.BASE_PATH, "train.csv"))
        self.test_df = pd.read_csv(os.path.join(Config.BASE_PATH, "test.csv"))
        self.le = LabelEncoder()
        
    def _extract_features(self, img):
        """Extract HOG and color histogram features"""
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        hog_features = hog(gray, 
                          orientations=Config.HOG_ORIENTATIONS,
                          pixels_per_cell=Config.HOG_PIXELS_PER_CELL,
                          cells_per_block=(1, 1))
        
        hist = cv2.calcHist([img], [0, 1, 2], None, 
                           Config.HIST_BINS, [0, 256, 0, 256, 0, 256])
        return np.hstack([hog_features, hist.flatten()])
    
    def load_data(self, extract_features=False):
        """Load and preprocess images"""
        def _process_df(df, label=True):
            X, X_features, y = [], [], []
            for _, row in df.iterrows():
                img = cv2.imread(os.path.join(Config.BASE_PATH, row['filename']))
                if img is None:
                    continue
                    
                img_resized = cv2.resize(img, (Config.IMG_SIZE, Config.IMG_SIZE))
                X.append(preprocess_input(img_resized))
                
                if extract_features:
                    X_features.append(self._extract_features(img_resized))
                
                if label:
                    y.append(row['class'])
                    
            return (np.array(X), np.array(X_features), np.array(y)) if extract_features else np.array(X)
        
        # Load training data
        if extract_features:
            X_train, X_train_features, y_train = _process_df(self.train_df, label=True)
            X_test, X_test_features, _ = _process_df(self.test_df, label=False)
            
            # Encode labels
            y_encoded = self.le.fit_transform(y_train)
            return {
                'X_train': X_train,
                'X_train_features': X_train_features,
                'y_train': y_encoded,
                'X_test': X_test,
                'X_test_features': X_test_features
            }
        else:
            X_train, y_train = _process_df(self.train_df, label=True)
            X_test = _process_df(self.test_df, label=False)
            y_encoded = self.le.fit_transform(y_train)
            return {
                'X_train': X_train,
                'y_train': y_encoded,
                'X_test': X_test
            }

In [12]:
#Model Explanation
class TraditionalML:
    @staticmethod
    def train_random_forest(X_train, y_train):
        print("\n=== Training Random Forest ===")
        model = RandomForestClassifier(
            n_estimators=100,
            max_depth=10,
            random_state=Config.RANDOM_STATE
        )
        model.fit(X_train, y_train)
        return model
    
    @staticmethod
    def train_svm(X_train, y_train):
        print("\n=== Training SVM ===")
        model = SVC(
            kernel='rbf',
            C=10,
            gamma='scale',
            probability=True,
            random_state=Config.RANDOM_STATE
        )
        model.fit(X_train, y_train)
        return model
    
    @staticmethod
    def run_kmeans(X, n_clusters):
        print("\n=== Running K-Means Clustering ===")
        model = KMeans(
            n_clusters=n_clusters,
            random_state=Config.RANDOM_STATE
        )
        return model.fit_predict(X)

class NeuralNetwork:
    @staticmethod
    def build_model(input_shape, num_classes):
        base_model = EfficientNetB0(
            include_top=False,
            weights='imagenet',
            input_shape=input_shape
        )
        base_model.trainable = True
        
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(1500, activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        outputs = Dense(num_classes, activation='softmax')(x)
        
        model = Model(inputs=base_model.input, outputs=outputs)
        model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )
        return model
    
    @staticmethod
    def get_data_augmenter():
        return ImageDataGenerator(
            rotation_range=40,
            zoom_range=0.25,
            width_shift_range=0.2,
            height_shift_range=0.2,
            horizontal_flip=True,
            brightness_range=[0.7, 1.3],
            shear_range=0.2,
            fill_mode='nearest'
        )

In [13]:
#Evaluation_utils
class Evaluation:
    @staticmethod
    def evaluate_model(model, X_val, y_val, model_name):
        preds = model.predict(X_val)
        acc = accuracy_score(y_val, preds)
        print(f"{model_name} Validation Accuracy: {acc:.4f}")
        return acc
    
    @staticmethod
    def plot_clusters(X, cluster_labels):
        skplt.metrics.plot_silhouette(X, cluster_labels)
        plt.title('Cluster Quality Analysis')
        plt.savefig('cluster_analysis.png')
        plt.close()

In [15]:
#Execution pipeline
def main():
    # Load_data features
    loader = DataLoader()
    data = loader.load_data(extract_features=True)
    
    # Split_data
    X_train_nn = data['X_train']
    y_train = data['y_train']
    X_test_nn = data['X_test']
    
    # Traditional_MLsplit
    X_train_ml, X_val_ml, y_train_ml, y_val_ml = train_test_split(
        data['X_train_features'], 
        y_train,
        test_size=Config.TEST_SIZE,
        stratify=y_train,
        random_state=Config.RANDOM_STATE
    )
    
    # Train_traditional_models
    rf_model = TraditionalML.train_random_forest(X_train_ml, y_train_ml)
    svm_model = TraditionalML.train_svm(X_train_ml, y_train_ml)
    
    # Evaluate_traditional_models
    Evaluation.evaluate_model(rf_model, X_val_ml, y_val_ml, "Random Forest")
    Evaluation.evaluate_model(svm_model, X_val_ml, y_val_ml, "SVM")
    
    # Clustering_analysis
    cluster_labels = TraditionalML.run_kmeans(data['X_train_features'], len(np.unique(y_train)))
    Evaluation.plot_clusters(data['X_train_features'], cluster_labels)
    
    # Neural_Network
    y_cat = to_categorical(y_train)
    X_train_nn, X_val_nn, y_train_nn, y_val_nn = train_test_split(
        X_train_nn, y_cat,
        test_size=Config.TEST_SIZE,
        stratify=y_train,
        random_state=Config.RANDOM_STATE
    )
    
    nn_model = NeuralNetwork.build_model(
        input_shape=(Config.IMG_SIZE, Config.IMG_SIZE, 3),
        num_classes=y_cat.shape[1]
    )
    
    train_gen = NeuralNetwork.get_data_augmenter()
    train_gen.fit(X_train_nn)
    
    history = nn_model.fit(
        train_gen.flow(X_train_nn, y_train_nn, batch_size=Config.BATCH_SIZE),
        epochs=Config.EPOCHS,
        validation_data=(X_val_nn, y_val_nn),
        callbacks=[
            EarlyStopping(patience=10, restore_best_weights=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4)
        ],
        verbose=1
    )
    
    # Ensemble_predictions
    nn_probs = nn_model.predict(X_test_nn)
    rf_probs = rf_model.predict_proba(data['X_test_features'])
    svm_probs = svm_model.predict_proba(data['X_test_features'])
    
    ensemble_probs = (
        Config.NN_WEIGHT * nn_probs +
        Config.RF_WEIGHT * rf_probs +
        Config.SVM_WEIGHT * svm_probs
    )
    
    final_preds = loader.le.inverse_transform(np.argmax(ensemble_probs, axis=1))
    
    # Save_results
    submission = pd.DataFrame({
        "id": loader.test_df["id"],
        "label": final_preds
    })
    submission.to_csv("Final_sub.csv", index=False)
    print("\n Final sub saved!")

  
  # Save_models
    os.makedirs(os.path.join(Config.BASE_PATH, 'models'), exist_ok=True)
    
    nn_model.save(os.path.join(Config.BASE_PATH, 'models', 'game_model.h5'))
    with open(os.path.join(Config.BASE_PATH, 'models', 'random_forest.pkl'), 'wb') as rf_file:
        pickle.dump(rf_model, rf_file)
    with open(os.path.join(Config.BASE_PATH, 'models', 'svm_model.pkl'), 'wb') as svm_file:
        pickle.dump(svm_model, svm_file)
    
    print("\n visionpillor models saved 'models' folder!")


if __name__ == "__main__":
    main()


=== Training Random Forest ===

=== Training SVM ===
Random Forest Validation Accuracy: 1.0000
SVM Validation Accuracy: 1.0000

=== Running K-Means Clustering ===
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 7s/step - accuracy: 0.6404 - loss: 1.1229 - val_accuracy: 0.9550 - val_loss: 0.1772 - learning_rate: 0.0010
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 7s/step - accuracy: 0.9557 - loss: 0.1561 - val_accuracy: 1.0000 - val_loss: 0.0122 - learning_rate: 0.0010
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 7s/step - accuracy: 0.9644 - loss: 0.1233 - val_accuracy: 0.9950 - val_loss: 0.0139 - learning_rate: 0.0010
Epoch 4/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m164s[0m 7s/step - accuracy: 0.9683 - loss: 0.1329 - val_accuracy: 1.0000 - val_loss: 0.0044 - learning_rate: 0.0010
Epoch 5/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 6s/step - accuracy: 0.9776 - loss: 0.1015 - val_accuracy: 0.9850 - val_loss: 0.0813 - learning_rate: 0.0010
Epoch 6/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0




 Final sub saved!

 visionpillor models saved 'models' folder!
