# **Demo Code for Solution 1**

# Imports

In [5]:
!pip install tqdm


Note: you may need to restart the kernel to use updated packages.


In [7]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, confusion_matrix
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import joblib
import seaborn as sns
import timm
import numpy as np
from sklearn.model_selection import StratifiedKFold, cross_val_score
from PIL import Image

# Loading the model

In [8]:
# Pre-trained resnet18 without the classification layer will be used for feature extraction
# Random forest classifier will be used for classification
# The output of the feature extractor will be the input to the classifier

class SugarcaneDiseaseFeatureExtractor(nn.Module):
  def __init__(self, num_classes=6):
    super(SugarcaneDiseaseFeatureExtractor, self).__init__()
    self.base_model = timm.create_model('resnet18', pretrained=True, num_classes=0)
    self.features = nn.Sequential(*list(self.base_model.children())[:-1])

  def forward(self, x):
    # Flattens the extracted features since it will be used as input to random forest
    features = self.features(x).reshape(x.size(0), -1)
    return features


class SugarcaneClassifierWrapper:
    '''
    Contains the feature extractor, classifier, transform and classes
    '''
    def __init__(self, feature_extractor, classifier, transform, classes):
        self.feature_extractor = feature_extractor
        self.classifier = classifier
        self.transform = transform
        self.classes = classes
        self.feature_extractor.eval()

    def extract_features(self, dataloader):
        '''
        Extracts features from the given dataloader.
        Returns the features and the labels.
        '''
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        features, labels = [], []
        with torch.no_grad():
            for imgs, lbls in tqdm(dataloader):
                imgs = imgs.to(device)
                feats = self.feature_extractor(imgs)
                features.append(feats.cpu().numpy())
                labels.extend(lbls.numpy())
        return np.concatenate(features), np.array(labels)

    def train(self, dataloader, n_estimators=100, criterion='entropy', random_state=42, n_splits=5):
        """
        Train the model on the given dataloader.

        """
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print("Extracting features...")
        X_train, y_train = self.extract_features(dataloader)
        print("Performing cross-validation...")
        kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)
        scores = cross_val_score(self.classifier, X_train, y_train, cv=kf, scoring='f1_weighted')
        print("Fold scores (f1 weighted):", scores)
        print("Mean F1:", np.mean(scores))
        print("Training the classifier...")
        self.classifier.fit(X_train, y_train)
        print("Training complete.")

    def evaluate(self, dataloader):
        """
        Evaluate the model on the given dataloader.
        Prints the accuracy and confusion matrix.

        """
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print("Extracting features...")
        X_test, y_test = self.extract_features(dataloader)
        y_pred = self.classifier.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        conf_matrix = confusion_matrix(y_test, y_pred)
        print("Accuracy:", accuracy)
        report = classification_report(y_test, y_pred, output_dict=True)
        print(classification_report(y_test, y_pred))
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=self.classes, yticklabels=self.classes)
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.tight_layout()
        plt.savefig('confusion_matrix.png')
        plt.show()


    def predict(self, image_path):
        """
        Predict the class of the given image.

        """
        image = Image.open(image_path).convert('RGB')
        image_tensor = self.transform(image).unsqueeze(0)
        with torch.no_grad():
            features = self.feature_extractor(image_tensor)
        return self.classes[self.classifier.predict(features.numpy())[0]]

### Link to the model: https://drive.google.com/file/d/1XowZzq6QQw7_SkheAOjXLPXxm3LXcuXa/view?usp=sharing 

In [3]:
model_path = input("Enter path to model: ")

ModuleNotFoundError: No module named 'google'

In [11]:
# Loading the model. It should be in the same folder as this notebook.
model = joblib.load(model_path)

# Single image prediction

In [15]:
image_path = input("Enter path to image: ")

Enter path to image:  C:\Users\Kathleen Jocson\Desktop\sugarcane_test\sugarcane_test\1.jpeg


In [16]:
# Single image prediction
model.predict(image_path)

'Viral'

# Multiple Images (No Label; For testing)




In [17]:
folder_path = input("Enter path to folder containing the testing images: ")

Enter path to folder containing the testing images:  C:\Users\Kathleen Jocson\Desktop\sugarcane_test\sugarcane_test


In [21]:
import os
import csv

image_paths = [os.path.join(folder_path, fname) for fname in os.listdir(folder_path) if fname.endswith(('.jpg', '.png', '.jpeg'))]


# for img_path in image_paths:
#     image_name = os.path.basename(img_path)
#     prediction = model.predict(img_path)
#     print(f"{image_name},{prediction}")

with open('predictions.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['image_filename', 'predicted_label'])  # Header

    for img_path in image_paths:
        image_name = os.path.basename(img_path)
        prediction = model.predict(img_path) 
        writer.writerow([image_name, prediction])