In [15]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision
import os
import pandas as pd
from PIL import Image
from torch.utils.data import (
    Dataset,
    DataLoader,
)


In [18]:
import os
import shutil

# set source and destination directories
source_dir = './data/Flowers'
dest_dir = './merge_images'

# create destination directory if it doesn't exist
if not os.path.exists(dest_dir):
    os.makedirs(dest_dir)

# loop through subdirectories in source directory
for subdir in os.listdir(source_dir):
    subdir_path = os.path.join(source_dir, subdir)
    # loop through image files in subdirectory
    for file in os.listdir(subdir_path):
        # get full path of source file
        src_file = os.path.join(subdir_path, file)
        # create destination file path
        dest_file = os.path.join(dest_dir, file)
        # copy file from source to destination
        shutil.copy(src_file, dest_file)

In [20]:
# Create a dictionary to map flower names to label numbers
flower_labels = {'babi': 0, 'calimerio': 1, 'chrysanthemum': 2, 'hydrangeas': 3, 
                 'lisianthus': 4, 'pingpong': 5, 'rosy': 6, 'tana': 7}

# Create an empty list to store the data rows for the CSV file
data_rows = []

# Loop over each flower folder
for flower_folder in os.listdir(source_dir):
    
    # Get the label for this flower folder
    label = flower_labels[flower_folder.lower()]
    
    # Loop over each image file in this flower folder
    for file_name in os.listdir(os.path.join(source_dir, flower_folder)):
        
        # Create a new row for this image file
        row = {'flower': file_name, 'label': label}
        
        # Add the row to the list of data rows
        data_rows.append(row)

# Write the data rows to a CSV file
with open('flower_labels.csv', 'w') as f:
    f.write('flower,label\n')
    for row in data_rows:
        f.write('{},{:d}\n'.format(row['flower'], row['label']))


In [13]:
class FlowerDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.annotations = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.annotations.iloc[index, 0])
        image = Image.open(img_path)
        y_label = torch.tensor(int(self.annotations.iloc[index, 1]))

        if self.transform:
            image = self.transform(image)

        return (image, y_label)


# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
in_channel = 3
num_classes = 2
learning_rate = 3e-4
batch_size = 32
num_epochs = 10

my_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224,224)),
])

# Load Data
dataset = FlowerDataset(
    csv_file="metadata.csv",
    root_dir=dest_dir,
    transform=my_transform,
)

train_set, test_set = torch.utils.data.random_split(dataset, [1, 1])
train_loader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=True)

# Model
from torchvision.models import resnet50
from torchvision.models import vgg16
vgg16_model = vgg16(weights=None)

# final layer is not frozen
vgg16_model.fc = nn.Linear(in_features=1024, out_features=num_classes)
vgg16_model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg16_model.parameters(), lr=learning_rate, weight_decay=1e-5)

In [14]:
# Train Network
for epoch in range(num_epochs):
    losses = []


    for batch_idx, (data, targets) in enumerate(train_loader):
        # Get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)

        # forward
        optimizer.zero_grad()

        scores = vgg16_model(data)
        loss = criterion(scores, targets)

        losses.append(loss.item())

        # backward            
        loss.backward()


        # gradient descent or adam step
        optimizer.step()

    print(f"Cost at epoch {epoch} is {sum(losses)/len(losses)}")

# Check accuracy on training
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(
            f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
        )

    model.train()


print("Checking accuracy on Training Set")
check_accuracy(train_loader, vgg16_model)

print("Checking accuracy on Test Set")
check_accuracy(test_loader, vgg16_model)



Cost at epoch 0 is 6.868964672088623
Cost at epoch 1 is 4.952603340148926
Cost at epoch 2 is 0.0
Cost at epoch 3 is 0.0
Cost at epoch 4 is 0.0
Cost at epoch 5 is 0.0
Cost at epoch 6 is 0.0
Cost at epoch 7 is 0.0
Cost at epoch 8 is 0.0
Cost at epoch 9 is 0.0
Checking accuracy on Training Set
Got 1 / 1 with accuracy 100.00
Checking accuracy on Test Set
Got 0 / 1 with accuracy 0.00
