<a href="https://colab.research.google.com/github/mimranbee24seecs-ctrl/MLPROJECT/blob/main/GOOGLE%20COLAB%20V3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import os
from google.colab import files

# Upload kaggle.json
if not os.path.exists('/content/kaggle.json'):
    files.upload()

# Configure Kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download and unzip the Dog Breed dataset
!kaggle competitions download -c dog-breed-identification
!unzip -q dog-breed-identification.zip -d /content/dog_data
print("Data is ready!")

Saving kaggle.json to kaggle.json
Downloading dog-breed-identification.zip to /content
 80% 552M/691M [00:03<00:01, 80.2MB/s]
100% 691M/691M [00:03<00:00, 210MB/s] 
Data is ready!


In [3]:
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Configuration from your code
IMAGE_SIZE = (331, 331)
batchSize = 8

# Load labels and fix filenames
df = pd.read_csv('/content/dog_data/labels.csv')
df['id'] = df['id'] + '.jpg'

# Create Data Generator (handles normalization from 0-255 to 0-1)
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.3)

# Training Set
train_generator = datagen.flow_from_dataframe(
    dataframe=df,
    directory='/content/dog_data/train',
    x_col="id",
    y_col="breed",
    subset="training",
    batch_size=batchSize,
    target_size=IMAGE_SIZE,
    class_mode="categorical"
)

# Validation Set (Replaces your train_test_split)
valid_generator = datagen.flow_from_dataframe(
    dataframe=df,
    directory='/content/dog_data/train',
    x_col="id",
    y_col="breed",
    subset="validation",
    batch_size=batchSize,
    target_size=IMAGE_SIZE,
    class_mode="categorical"
)

Found 7156 validated image filenames belonging to 120 classes.
Found 3066 validated image filenames belonging to 120 classes.


In [4]:
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications.nasnet import NASNetLarge
from tensorflow.keras.optimizers import Adam

IMAGE_FULL_SIZE = (331, 331, 3) #

# Build the model
myModel = NASNetLarge(input_shape=IMAGE_FULL_SIZE, weights='imagenet', include_top=False)

# Freeze layers
for layer in myModel.layers:
    layer.trainable = False

plusFlattenLayer = Flatten()(myModel.output) #

# Add prediction layer for 120 breeds
predicition = Dense(120, activation='softmax')(plusFlattenLayer)

model = Model(inputs=myModel.input, outputs=predicition)

# Compile with your learning rate
lr = 1e-4
model.compile(
    loss='categorical_crossentropy',
    optimizer=Adam(lr),
    metrics=['accuracy']
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/nasnet/NASNet-large-no-top.h5
[1m343610240/343610240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [None]:
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

# Save model to your Google Drive folder
best_model_file = "/content/drive/MyDrive/dogs_model.h5"

# Your callbacks list
callbacks_list = [
    ModelCheckpoint(best_model_file, verbose=1, save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.1, verbose=1, min_lr=1e-6),
    EarlyStopping(monitor='val_accuracy', patience=7, verbose=1)
]

# Train (fit) using generators
r = model.fit(
    train_generator,
    validation_data=valid_generator,
    epochs=30,
    callbacks=callbacks_list # Removed the double brackets
)

Epoch 1/30


  self._warn_if_super_not_called()


[1m 81/895[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2:43[0m 201ms/step - accuracy: 0.3754 - loss: 4.1654

In [None]:
import matplotlib.pyplot as plt
import os

# 1. SAVE THE MODEL (Crucial Step)
# We save to Drive so you never lose it again
save_path = "/content/drive/MyDrive/dogs_model_retrained.h5"
model.save(save_path)
print(f"✅ Model saved safely to: {save_path}")

# 2. PLOT TRAINING HISTORY
# This works because you just finished training and 'history' is in memory
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(1, len(acc) + 1)

plt.figure(figsize=(14, 5))

# Plot Accuracy
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

# Plot Loss
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

plt.show()

In [None]:
# Classical ML Requirement part

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model

# 1. Extract Features
# We grab the output of the layer just before the final prediction
print("Extracting features for Classical ML...")
feature_extractor = Model(inputs=model.input, outputs=model.layers[-2].output)
features = feature_extractor.predict(valid_generator, verbose=1)
labels = valid_generator.classes

# 2. Split Data
X_train_ml, X_test_ml, y_train_ml, y_test_ml = train_test_split(features, labels, test_size=0.3, random_state=42)

# 3. Train & Evaluate SVM
print("Training SVM...")
svm = SVC(kernel='linear')
svm.fit(X_train_ml, y_train_ml)
svm_acc = accuracy_score(y_test_ml, svm.predict(X_test_ml))
print(f"✅ SVM Accuracy: {svm_acc:.4f}")

# 4. Train & Evaluate Random Forest
print("Training Random Forest...")
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train_ml, y_train_ml)
rf_acc = accuracy_score(y_test_ml, rf.predict(X_test_ml))
print(f"✅ Random Forest Accuracy: {rf_acc:.4f}")

In [None]:
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# 1. Get Predictions
print("Generating final predictions...")
deep_preds = model.predict(valid_generator, verbose=1)
predicted_classes = np.argmax(deep_preds, axis=1)
class_labels = list(valid_generator.class_indices.keys())

# 2. Confusion Matrix (First 15 Breeds)
print("Plotting Confusion Matrix...")
cm = confusion_matrix(labels, predicted_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(cm[:15, :15], annot=True, fmt='d', cmap='Blues',
            xticklabels=class_labels[:15], yticklabels=class_labels[:15])
plt.title('Confusion Matrix (First 15 Breeds)')
plt.show()

# 3. Best vs Worst Breeds Bar Chart
print("Plotting Accuracy by Breed...")
report_dict = classification_report(labels, predicted_classes, target_names=class_labels, output_dict=True)
breed_scores = {k: v['f1-score'] for k, v in report_dict.items() if k not in ['accuracy', 'macro avg', 'weighted avg']}
sorted_breeds = sorted(breed_scores.items(), key=lambda x: x[1], reverse=True)

fig, axes = plt.subplots(1, 2, figsize=(18, 6))
# Top 10
breeds, scores = zip(*sorted_breeds[:10])
axes[0].barh(breeds, scores, color='green')
axes[0].set_title('Top 10 Breeds')
axes[0].invert_yaxis()
# Bottom 10
breeds, scores = zip(*sorted_breeds[-10:])
axes[1].barh(breeds, scores, color='red')
axes[1].set_title('Bottom 10 Breeds')
axes[1].invert_yaxis()
plt.tight_layout()
plt.show()

# 4. Text Report
print("\n--- FINAL CLASSIFICATION REPORT (Top 10) ---")
print(classification_report(labels, predicted_classes, target_names=class_labels, max_dict=10))