In [None]:
import os
from pathlib import Path

import torch
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

from torch import nn, optim
import torch.nn.functional as F

import torchvision.models as models

import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
project_path = "..\\Thesis_Code"
os.chdir(project_path)

In [None]:
#data_folder = "data_h"
#data_folder = "data_e"
#data_folder = "data_n"
#data_folder = "data_r"
#data_folder = "data_i"
#data_folder = "data_hen"
#data_folder = "data_ri"
data_folder = "data_henri"


train_path = Path.cwd() / "CNN_classifier" / "selector" / data_folder / "train"
val_path = Path.cwd() / "CNN_classifier" / "selector" / data_folder / "validation"
test_path = Path.cwd() / "CNN_classifier" / "selector" / data_folder / "test"

In [None]:
# Data access

tsfm = transforms.Compose([transforms.Resize((224,224)),
                           transforms.ToTensor(),
                           transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                std=[0.229, 0.224, 0.225])])

train_dataset = ImageFolder(root=train_path,
                            transform=tsfm)

train_loader = DataLoader(train_dataset,
                          batch_size=25,
                          num_workers=4,
                          shuffle=True)

val_dataset = ImageFolder(root=val_path,
                          transform=tsfm)

val_loader = DataLoader(val_dataset,
                        batch_size=25,
                        num_workers=4)

train_dataset.class_to_idx

In [None]:
def evaluate_model(val_data_loader, val_dataset, model, loss_fn):
    losses = []
    n_correct = 0
    with torch.no_grad():
        for b_x, b_y in val_data_loader:
            
            b_x, b_y = b_x.to(device), b_y.to(device)
            
            pred = model(b_x)
            loss = loss_fn(pred, b_y)
            losses.append(loss.item())
            
            hard_preds = pred.argmax(dim=1)
            n_correct += torch.sum(hard_preds == b_y).item()
        val_accuracy = n_correct/len(val_dataset)
        val_avg_loss = sum(losses)/len(losses)    
    
    return val_accuracy, val_avg_loss

def train_model(model, optimizer, train_loader, val_loader, train_dataset, val_dataset, epochs=10, display=True):

    class_weights = torch.tensor([2359/2359, 2359/1241], dtype=torch.float)
    loss_fn = nn.NLLLoss(weight=class_weights)
    
    for epoch in range(epochs):
        losses = []
        n_correct = 0
        
        for i, (b_x, b_y) in enumerate(train_loader):
            
            b_x, b_y = b_x.to(device), b_y.to(device)
            
            # Compute predictions and losses
            pred = model(b_x)
            loss = loss_fn(pred, b_y)
            losses.append(loss.item())
            
            # Count number of correct predictions
            hard_preds = pred.argmax(dim=1)
            n_correct += torch.sum(hard_preds == b_y).item()
    
            # Backpropagate
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()    
            
            # Optionally display progress
            if display and ((i+1)%50 == 0):
                print("Batch {}".format(i+1))
            
        # Compute accuracy and loss in the entire training set
        train_accuracy = n_correct/len(train_dataset)
        train_avg_loss = sum(losses)/len(losses)   
            
        # Compute accuracy and loss in the entire validation set
        val_accuracy, val_avg_loss = evaluate_model(val_loader, val_dataset, model, loss_fn)
            
        # Optionally display metrics
        if display:
            display_str = 'Epoch {} '
            display_str += '\tLoss: {:.3f} '
            display_str += '\tLoss (val): {:.3f}'
            display_str += '\tAccuracy: {:.2f} '
            display_str += '\tAccuracy (val): {:.2f}'
            print(display_str.format(epoch, train_avg_loss, val_avg_loss, train_accuracy, val_accuracy))

In [None]:
# Create model

alexnet = models.alexnet(pretrained=True)

num_ftrs = alexnet.classifier[1].in_features
num_classes = 2

new_top = nn.Sequential(nn.Linear(num_ftrs, num_classes),
                        nn.LogSoftmax(dim=1))

alexnet.classifier = new_top

# Freeze bottom
for parameter in alexnet.features.parameters():
    parameter.requires_grad = False

In [None]:
# Training

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

alexnet.to(device)

optimizer = optim.Adam(alexnet.parameters(), lr=1e-4)

train_model(alexnet, optimizer, train_loader, val_loader, train_dataset, val_dataset)

In [None]:
def apply_test_transforms(inp):
    out = transforms.functional.resize(inp, [224,224])
    out = transforms.functional.to_tensor(out)
    out = transforms.functional.normalize(out, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    return out

In [None]:
test_data_files = os.listdir(test_path)

csv_file_path = "./MAPF_Framework/solver/" + data_folder + "/" + "test_data.csv"
df = pd.read_csv(csv_file_path)
test_data = df.set_index("file").to_dict(orient = 'index')

b_count = 0
total_costs = {
  "A": 0,
  "B": 0,
  "model": 0,
  "best": 0,
  "worst": 0
}

# Make predictions
for i, file in enumerate(test_data_files):
    instance_name = file.split('.')[0]
    
    x = apply_test_transforms(Image.open(f'{test_path}/{file}').convert('RGB'))[None, :, :, :]
    x = x.to(device)
        
    pred = alexnet(x)
    hard_pred = pred.argmax(dim=1)
    
    pred_class = "A" if (hard_pred.item() == 0) else "B"
    
    if (pred_class == "B"):
        b_count += 1
    
    total_costs["A"] += test_data[instance_name]["A"]
    total_costs["B"] += test_data[instance_name]["B"]
    total_costs["model"] += test_data[instance_name][pred_class]
    total_costs["best"] += min(test_data[instance_name]["A"], test_data[instance_name]["B"])
    total_costs["worst"] += max(test_data[instance_name]["A"], test_data[instance_name]["B"])
    
    if (i%10 == 0):
        print("Done {:5d}/{}".format(i, len(test_data_files)))

print(b_count)

In [None]:
diff_total_costs = {k: v-total_costs["best"] for k, v in total_costs.items()}
diff_total_costs

In [None]:
ratio_total_costs = {k: round(100*v/total_costs["best"], 2) for k, v in total_costs.items()}
ratio_total_costs

In [None]:
x_titles = [r'$\pi_{A}$', r'$\pi_{B}$', r'$\pi$', "Worst"]

def make_graph_diff(costs_in):
    costs = dict((key,value) for key, value in costs_in.items() if key != "best")
    
    plt.figure(figsize=(8,5))
    plt.rcParams.update({'font.size': 16})
    
    plt.title("Exp. HENRI")
    plt.xlabel("mappings")
    plt.ylabel("difference with " + r"T($\pi^{*}$)")
    
    plt.bar(range(len(costs)), list(costs.values()), align="center", width = 0.6, color=["C0", "C0", "C2", "C0"])
    plt.xticks(range(len(costs)), x_titles)
    
    costs_list = list(costs.values())
    ymin = 0
    ymax = 80
    
    plt.ylim([ymin, ymax])
    plt.grid()
    #plt.show()
    plt.savefig('result_exp_henri.pdf', bbox_inches='tight')

In [None]:
make_graph_diff(diff_total_costs)

In [None]:
# Unfreeze bottom to fine-tune

for parameter in alexnet.features.parameters():
    parameter.requires_grad = True

In [None]:
optimizer = optim.Adam(alexnet.parameters(), lr=1e-6)

train_model(alexnet, optimizer, train_loader, val_loader, train_dataset, val_dataset)

In [None]:
b_count_ft = 0
total_costs_ft = {
  "A": 0,
  "B": 0,
  "model": 0,
  "best": 0,
  "worst": 0
}

# Make predictions
for i, file in enumerate(test_data_files):
    instance_name = file.split('.')[0]
    
    x = apply_test_transforms(Image.open(f'{test_path}/{file}').convert('RGB'))[None, :, :, :]
    x = x.to(device)
        
    pred = alexnet(x)
    hard_pred = pred.argmax(dim=1)
    
    pred_class = "A" if (hard_pred.item() == 0) else "B"
    
    if (pred_class == "B"):
        b_count_ft += 1
    
    total_costs_ft["A"] += test_data[instance_name]["A"]
    total_costs_ft["B"] += test_data[instance_name]["B"]
    total_costs_ft["model"] += test_data[instance_name][pred_class]
    total_costs_ft["best"] += min(test_data[instance_name]["A"], test_data[instance_name]["B"])
    total_costs_ft["worst"] += max(test_data[instance_name]["A"], test_data[instance_name]["B"])
    
    if (i%10 == 0):
        print("Done {:5d}/{}".format(i, len(test_data_files)))

print(b_count_ft)

In [None]:
diff_total_costs_ft = {k: v-total_costs_ft["best"] for k, v in total_costs_ft.items()}
diff_total_costs_ft

In [None]:
ratio_total_costs_ft = {k: round(100*v/total_costs_ft["best"], 2) for k, v in total_costs_ft.items()}
ratio_total_costs_ft

In [None]:
make_graph_diff(diff_total_costs_ft)

In [None]:
alexnet.features

In [None]:
alexnet.classifier