In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
import torchvision
from torchvision import transforms, datasets, models
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_recall_fscore_support
import seaborn as sns


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Make all photos the same size
    transforms.ToTensor(),          # Convert to numbers
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Standardize colors
])

In [None]:
data_dir = '/kaggle/input/poultry/dataset2/poultry_diseases'

In [None]:
full_dataset = datasets.ImageFolder(data_dir, transform=transform)

In [None]:
print(f"Classes found: {full_dataset.classes}")
print(f"Class to index mapping: {full_dataset.class_to_idx}")

In [None]:
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size

In [None]:
train_dataset, test_dataset = torch.utils.data.random_split(
    full_dataset, [train_size, test_size]
)

In [None]:
print(f"Total images: {len(full_dataset)}")
print(f"Training images: {len(train_dataset)}")
print(f"Testing images: {len(test_dataset)}")

In [None]:
batch_size = 32  # Process 32 images at a time

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Create the model using modern PyTorch syntax
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)  # Use pre-trained weights

# Modify the last layer for your specific classification task
num_classes = len(full_dataset.classes)
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Move model to GPU if available
model = model.to(device)



In [None]:
print(f"Model created with {num_classes} output classes")

In [None]:
criterion = nn.CrossEntropyLoss()  # Loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Optimizer

In [None]:
def train_model(epochs=10):
    model.train()
    
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        for batch_idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            if batch_idx % 20 == 0:
                print(f'Epoch [{epoch+1}/{epochs}], Batch [{batch_idx}], Loss: {loss.item():.4f}')
        
        accuracy = 100 * correct / total
        print(f'Epoch [{epoch+1}/{epochs}] - Accuracy: {accuracy:.2f}%, Loss: {running_loss/len(train_loader):.4f}')

In [None]:
# Test the model
def test_model():
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    print(f'Test Accuracy: {accuracy*100:.2f}%')
    
    return all_labels, all_preds

In [None]:
def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=full_dataset.classes, 
                yticklabels=full_dataset.classes)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()


In [None]:
def show_predictions():
    model.eval()
    data_iter = iter(test_loader)
    images, labels = next(data_iter)
    
    with torch.no_grad():
        outputs = model(images.to(device))
        _, predicted = torch.max(outputs, 1)
    
    fig, axes = plt.subplots(2, 4, figsize=(12, 6))
    for i in range(8):
        ax = axes[i//4, i%4]
        
        # Denormalize image
        img = images[i]
        img = img * torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1) + torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
        img = torch.clamp(img, 0, 1)
        
        ax.imshow(img.permute(1, 2, 0))
        actual = full_dataset.classes[labels[i]]
        pred = full_dataset.classes[predicted[i]]
        color = 'green' if actual == pred else 'red'
        ax.set_title(f'True: {actual}\nPred: {pred}', color=color)
        ax.axis('off')
    
    plt.tight_layout()
    plt.show()

In [None]:
print("Starting training...")
train_model(epochs=10)

In [None]:
print("\nTesting model...")
y_true, y_pred = test_model()

In [None]:
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=full_dataset.classes))

In [None]:
print("\nConfusion Matrix:")
plot_confusion_matrix(y_true, y_pred)

In [None]:
print("\nSample Predictions:")
show_predictions()

In [None]:
print("\n" + "="*50)
print("ADDITIONAL EVALUATIONS")
print("="*50)

precision, recall, f1, support = precision_recall_fscore_support(y_true, y_pred)

print("\nPer-class results:")
for i, class_name in enumerate(full_dataset.classes):
    print(f"{class_name}: Precision={precision[i]:.2f}, Recall={recall[i]:.2f}, F1={f1[i]:.2f}")

In [None]:
# Get confidence scores
model.eval()
all_probs = []
all_labels = []
all_preds = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)
        max_probs, preds = torch.max(probs, 1)
        
        all_probs.extend(max_probs.cpu().numpy())
        all_labels.extend(labels.numpy())
        all_preds.extend(preds.cpu().numpy())

In [None]:
all_probs = np.array(all_probs)
all_labels = np.array(all_labels)
all_preds = np.array(all_preds)

In [None]:
# Confidence histogram
correct_mask = all_labels == all_preds
plt.figure(figsize=(8, 5))
plt.hist(all_probs[correct_mask], alpha=0.7, label='Correct', bins=15, color='green')
plt.hist(all_probs[~correct_mask], alpha=0.7, label='Wrong', bins=15, color='red')
plt.xlabel('Confidence Score')
plt.ylabel('Count')
plt.title('Confidence Distribution')
plt.legend()
plt.show()

In [None]:
# Summary stats
total = len(all_probs)
correct = (all_labels == all_preds).sum()
wrong = total - correct
low_conf = (all_probs < 0.7).sum()
high_conf = (all_probs > 0.9).sum()

print(f"\nConfidence Summary:")
print(f"Total predictions: {total}")
print(f"Correct: {correct}, Wrong: {wrong}")
print(f"Average confidence: {all_probs.mean():.3f}")
print(f"Low confidence (<0.7): {low_conf}")
print(f"High confidence (>0.9): {high_conf}")
print(f"Low confidence but correct: {((all_probs < 0.7) & correct_mask).sum()}")
print(f"Low confidence and wrong: {((all_probs < 0.7) & ~correct_mask).sum()}")

In [None]:
# Show low confidence predictions
print("\nLow confidence predictions:")
for i, conf in enumerate(all_probs):
    if conf < 0.7:
        true_class = full_dataset.classes[all_labels[i]]
        pred_class = full_dataset.classes[all_preds[i]]
        print(f"True: {true_class}, Predicted: {pred_class}, Confidence: {conf:.2f}")

In [None]:
# Most confused classes
cm = confusion_matrix(y_true, y_pred)
print("\nMost confused classes:")
for i in range(len(full_dataset.classes)):
    for j in range(len(full_dataset.classes)):
        if i != j and cm[i][j] > 2:  # more than 2 wrong predictions
            print(f"{full_dataset.classes[i]} confused with {full_dataset.classes[j]}: {cm[i][j]} times")

In [None]:
# Save model
torch.save(model.state_dict(), 'poultry_model.pth')
print("\nModel saved!")