In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train_dir = '/kaggle/input/jotikadas/sheep-classification-challenge-2025/Sheep Classification Images/train/'
test_dir = '/kaggle/input/jotikadas/sheep-classification-challenge-2025/Sheep Classification Images/test/'

In [None]:
import os
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim


In [None]:
csv_path = "/kaggle/input/d/jotikadas/sheep-classification-challenge-2025/Sheep Classification Images/train_labels.csv"
df = pd.read_csv(csv_path)


# Liste des labels (bon ordre)
classes = sorted(df["label"].unique())
print(classes)
class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
idx_to_class = {i: cls_name for cls_name, i in class_to_idx.items()}

# Ajoutcolonne index√©e (num√©rique)
df["label_idx"] = df["label"].map(class_to_idx)


In [None]:
class SheepDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.df = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.image_dir, row['filename'])
        image = Image.open(img_path).convert('RGB')
        label = row['label_idx']
        
        if self.transform:
            image = self.transform(image)
        
        return image, label


In [None]:
# Split
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["label_idx"], random_state=42)

# Chemin vers les images
train_dir = "/kaggle/input/d/jotikadas/sheep-classification-challenge-2025/Sheep Classification Images/train"

# Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# Datasets
train_dataset = SheepDataset(train_df, train_dir, transform)
val_dataset = SheepDataset(val_df, train_dir, transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from torchvision import models
import torch.nn as nn

model = models.resnet50(pretrained=True)

# Adapter la couche finale au nombre de classes
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 7)

# Envoyer sur le bon device
model = model.to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


In [None]:
from sklearn.metrics import f1_score

for epoch in range(5): 
    #  Phase d'entra√Ænement
    model.train()
    total_loss = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    #  Phase de validation
    model.eval()
    val_preds = []
    val_targets = []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)

            val_preds.extend(preds.cpu().numpy())
            val_targets.extend(labels.cpu().numpy())

    #  Calcul F1
    val_f1 = f1_score(val_targets, val_preds, average='macro')

    # üñ®Ô∏è R√©sultats
    print(f"Epoch {epoch+1} - Loss: {total_loss:.4f}")
    print(f"Epoch {epoch+1} - Val F1 Score: {val_f1:.4f}")
    print('-'*50)


In [None]:
test_dir = "/kaggle/input/d/jotikadas/sheep-classification-challenge-2025/Sheep Classification Images/test"
test_filenames = os.listdir(test_dir)

class SheepTestDataset(Dataset):
    def __init__(self, filenames, image_dir, transform=None):
        self.filenames = filenames
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        filename = self.filenames[idx]
        img_path = os.path.join(self.image_dir, filename)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, filename

test_dataset = SheepTestDataset(test_filenames, test_dir, transform)
test_loader = DataLoader(test_dataset, batch_size=32)

# Pr√©dictions
model.eval()
results = []

with torch.no_grad():
    for images, filenames in test_loader:
        images = images.to(device)
        outputs = model(images)
        preds = torch.argmax(outputs, 1).cpu().numpy()

        for fname, pred in zip(filenames, preds):
            label = idx_to_class[pred]
            results.append((fname, label))


In [None]:
submission = pd.DataFrame(results, columns=["filename", "label"])
submission.to_csv("submission_2.csv", index=False)
