In [5]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf

### functions

In [6]:
def create_database_df(dataset_folder: str) -> 'DataFrame':

    """
    This function takes a bird database folder (unzipped) and generate a dataframe with two columns:
    - image_path
    - species
    """

    data_lst = []

    species_folders = os.listdir(dataset_folder)

    for folder in species_folders: 
        for image_file in Path(os.path.join(dataset_folder,folder)).glob("*.jpg"):
            row = {
                'img_path':image_file,
                'species':os.path.basename(image_file).rsplit('_', 1)[0]
            }
            data_lst.append(row)

    return pd.DataFrame(data_lst)

### load data

In [7]:
dataset_path = "./french_bird_db_50_100"
nb_classes = 50
nb_image_per_class = 100


image_df = create_database_df(dataset_path)
image_df.head()

Unnamed: 0,img_path,species
0,french_bird_db_50_100/cinclus_cinclus/cinclus_...,cinclus_cinclus
1,french_bird_db_50_100/cinclus_cinclus/cinclus_...,cinclus_cinclus
2,french_bird_db_50_100/cinclus_cinclus/cinclus_...,cinclus_cinclus
3,french_bird_db_50_100/cinclus_cinclus/cinclus_...,cinclus_cinclus
4,french_bird_db_50_100/cinclus_cinclus/cinclus_...,cinclus_cinclus


### clean data

In [8]:
def get_image_size(img_path: str) -> (int, ...):
    '''
    returns a tuple with iamge np_array_shape
    '''
    np_image = plt.imread(img_path)
    return(np_image.shape)


def get_good_images(image_path_df: 'DataFrame', expected_size: (int, ...) = (300,300,3)) -> 'DataFrame':
    '''
    return a dataframe with all image path to delete
    '''
    image_path_df['img_size'] = image_path_df['img_path'].apply(get_image_size)
    return image_path_df.loc[image_path_df['img_size'] == expected_size]

In [9]:
print(image_df.shape)
image_df = get_good_images(image_df)
print(image_df.shape)

(5000, 2)
(4996, 3)


In [10]:
# one hot encode categories
# convert categoreis to number
unique_species = image_df['species'].unique()
nb_unique_species = len(unique_species)
categories_dict = dict(zip(unique_species, list(range(nb_unique_species))))
category2bird_name_dict = {v:k for k, v in categories_dict.items()}
image_df['species'] = image_df['species'].apply(lambda x: categories_dict[x] if x in categories_dict else x)

In [11]:
category2bird_name_dict = {v:k for k, v in categories_dict.items()}

# train test split

In [12]:
image_df_train, image_df_test = train_test_split(image_df, random_state=0, shuffle=True)
image_df_train.shape, image_df_test.shape

((3747, 3), (1249, 3))

# model building (RESNET 34)

In [13]:
# import torchvision
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import Dataset, DataLoader
from PIL import Image

In [14]:
# Load dataset
# class LargeDataset(Dataset):
#     def __init__(self, img_path_df: 'DataFrame', transform: 'function'=None):
#         """
#         Args:
#             img_path_df (DataFrame): Dataframe with two columns: one with path to an image and onther with the corresponding class.
#             transform (callable, optional): Optional transform to be applied on a sample when loaded.
#         """
#         self.img_data = img_path_df
#         self.transform = transform

#     def __len__(self):
#         return len(self.img_data)

#     def __getitem__(self, idx):
#         img_path = self.img_data.iloc[idx, 0]
#         image = Image.open(img_path).convert("RGB")
#         label = self.img_data.iloc[idx, 1]
#         if self.transform:
#             image = self.transform(image)
#         return image, label



In [15]:

from large_dataset import LargeDataset

In [23]:
# load model 
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
num_classes = nb_classes
model.fc = nn.Linear(model.fc.in_features, num_classes)

# expected normalisation step for the model
normalize_step = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])

transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224 pixels
    transforms.ToTensor(),
    normalize_step,  # Normalize based on ImageNet dataset statistics
])

# Load the training and testing datasets
dataset_train = LargeDataset(image_df_train, transform=transform)
dataset_test = LargeDataset(image_df_test, transform=transform)

trainloader = DataLoader(dataset_train, batch_size=32, shuffle=True, num_workers=2)
testloader = DataLoader(dataset_test, batch_size=32, shuffle=False, num_workers=2)

# loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)



In [17]:
# Move the model to the GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "mps")
print(device)

mps


In [18]:
# Move the model to the GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "mps")
model.to(device)

# Training loop
for epoch in range(10):  # Number of epochs
    running_loss = 0.0
    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the parameter gradients

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()  # backward propagation
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch + 1}/10], Loss: {running_loss/len(trainloader)}')

print('Finished Training')

118
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
Epoch [1/10], Loss: 3.2836982335074474
118
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
Epoch [2/10], Loss: 1.8690100716332259
118
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
6

In [29]:
#Evaluation mode
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in testloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct / total}%')


RuntimeError: Input type (MPSFloatType) and weight type (torch.FloatTensor) should be the same

In [24]:
# Display some images with image to predict and predicted bird

def get_random_bird_image_from_species(species_number: int, image_df: 'DataFrame') -> str:
    """
    return path of a random bird iamge from the class of the specie from the dataset
    """
    return image_df.loc[image_df['species'] == species_number, ['img_path']].sample(n=1)


def get_species_name_from_number(species_number: int, number2name_dict: dict) -> str:
    return number2name_dict[species_number]

In [21]:
import matplotlib.pyplot as plt

# load prediction from output

species_number_pred = 
species_name_pred = get_species_name_from_number(species_number_pred)
pred_img_path = get_random_bird_image_from_species()


fig, axs = plt.subplots(nrows=1, ncols=2)



SyntaxError: invalid syntax (1056652916.py, line 5)