# Evaluation Notebook

## Download Data

either provide a download link here: https://drive.google.com/drive/folders/1tOMxGHMRtY8E1p1NKun6Wi_4DHMmRjAq?usp=sharing  
(sorry but gdown seemingly can't handle more than 50 files at a time and we have 40000)

You need to keep the same structure as in the drive file to avoid any problem  
i.e.: you should download the whole folder named IPEO_Planet_project and have the following structure  
 - submission_<names_our_names>  
   - IPEO-Understanding-the-Amazon-from-Space  
     - some code + logs + CSV  
     - this file <evaluation.ipynb>
   - IPEO_Planet_project
     - checkpoints  
     - train-jpg  
     - train_labels.csv

## Your Plots and Results

By now you should have run the command to install to relevant packages in your virtual env (pip install -r requirements.txt)

Please check that you have a gpu enabled, otherwise the loading of the checkpoint and the creation of the trainer may not work 

In [4]:
from dataset import DatasetAmazon
from model import PlanetModel, testModel, ResNet
import torch
import pytorch_lightning as pl
from pytorch_lightning import loggers as pl_loggers
import time

from pytorch_lightning.callbacks import ModelCheckpoint
import os
import sys
import numpy as np

from accuracy_metrics import Hamming_distance, transform_pred, overall_acc, count_false

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device) # check that 

If you just want to see if it runs set:  
 - full, tiny = False, True  

Note: you will **NOT** retrieve our results with this option

In [None]:
full, tiny = True, False
test_dataset = DatasetAmazon(full=full, tiny=tiny, test=True, path_to_labels="CSV/train_label_vector.pkl")
test_dl = torch.utils.data.DataLoader(test_dataset, batch_size = 1, num_workers=4, shuffle=False) # change workers if you have more

### Pretrained model

In [None]:
model_name = "ResNet18-epoch=47_over_50-val_accuracy=0.93.ckpt"
checkpoint_path = "../IPEO_Planet_project/checkpoints/"+model_name

pretrained = True
depth = 18
test_model = ResNet(depth=depth)
model = PlanetModel(model=test_model)
model.load_state_dict(torch.load(checkpoint_path)["state_dict"])

In [None]:
csv_logger = pl_loggers.CSVLogger(save_dir="", name = "logs")

max_epochs = 50

checkpoint_callback = ModelCheckpoint(
    dirpath="../IPEO_Planet_project/checkpoints",
    filename=f'ResNet{depth}'+'-{epoch}_'+f'over_{max_epochs}'+'-{val_accuracy:.2f}',
    monitor="val_accuracy",
    mode="max"
)

In [None]:
trainer = pl.Trainer(max_epochs=max_epochs, accelerator="gpu", devices=[0], 
                     logger=csv_logger, callbacks=[checkpoint_callback],
                     resume_from_checkpoint=None, check_val_every_n_epoch=1)

In [None]:
rs = trainer.predict(model, dataloaders=test_dl) # this can take a little bit of time (like 10-20min depending on your system)

In [None]:
false_neg, false_pos = np.zeros((1, 17)), np.zeros((1, 17))
y_hat, y = list(map(list, zip(*rs)))
overall_accuracy = 0.0
hamming_dist = 0.0
for i in range(len(y_hat)):
    overall_accuracy += overall_acc(y_hat[i], y[i])
    hamming_dist += Hamming_distance(y_hat[i], y[i])
    false_positive, false_negative = count_false(y_hat[i], y[i])
    false_pos += false_positive
    false_neg += false_negative
overall_accuracy = overall_accuracy/len(y_hat)
hamming_dist = hamming_dist/len(y_hat)
                                        
print(f"Overall accuracy: {overall_accuracy:.4f}\nHamming distance: {hamming_dist}")
print("False positive (predict 1 instead of 0): ",false_pos, "\nFalse negative (predict 0 instead of 1): ",false_neg)

#### Make the graphs

In [None]:
import os
import numpy as np
from skimage.io import imsave, imread
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# Load the logs
LOG_PATH = './logs/resnet18_loss_with_weights/metrics.csv'
log_df = pd.read_csv(LOG_PATH, sep=';')
log_df.index = log_df.epoch
log_df.drop(columns=['epoch'], inplace=True)

In [None]:
fig1,fig2,fig3,fig4 = plt.figure(figsize=(15,9)),plt.figure(figsize=(15,9)),plt.figure(figsize=(15,9)),plt.figure(figsize=(15,9))
ax1 = fig1.add_subplot(111)
ax2 = fig2.add_subplot(111)
ax3 = fig3.add_subplot(111)
ax4 = fig4.add_subplot(111)
axs = [ax1,ax2,ax3,ax4]
figs = [fig1,fig2,fig3,fig4]
values = [log_df.train_loss, log_df.val_loss, log_df.hamming_dist, log_df.val_accuracy]

for i in range(4):
  axs[i].set_xlim([0,49])
  axs[i].grid(visible=True, which='major', axis='both', linestyle='--', alpha=0.7)
  axs[i].plot(log_df.index, values[i])
  axs[i].set_xlabel('Epoch', fontsize=15)

ax1.set_ylabel('Training loss', fontsize=15), ax1.set_title('Loss on training set\n', fontsize=20)

ax2.set_ylabel('Validation loss', fontsize=15), ax2.set_title('Loss on validation set\n', fontsize=20)

ax3.set_ylabel('Hamming distance', fontsize=15), ax3.set_title('Hamming distance\n', fontsize=20)

ax4.set_ylabel('Validation accuracy', fontsize=15), ax4.set_title('Accuracy of model on validation set\n', fontsize=20)

### Model from scratch

In [None]:
model_name = "test-epoch=34_over_50-val_accuracy=0.93.ckpt"
checkpoint_path = "../IPEO_Planet_project/checkpoints/"+model_name

test_model = testModel(max_channels=512) 
model = PlanetModel(model=test_model)
model.load_state_dict(torch.load(checkpoint_path)["state_dict"])

In [None]:
csv_logger = pl_loggers.CSVLogger(save_dir="", name = "logs")

max_epochs = 50


ModelCheckpoint(
    dirpath="../IPEO_Planet_project/checkpoints",
    filename='test-{epoch}_'+ f'over_{max_epochs}'+ '-{val_accuracy:.2f}',
    monitor="val_accuracy",
    mode="max"
    )

In [None]:
trainer = pl.Trainer(max_epochs=max_epochs, accelerator="gpu", devices=[0], 
                     logger=csv_logger, callbacks=[checkpoint_callback],
                     resume_from_checkpoint=None, check_val_every_n_epoch=1)

In [None]:
rs = trainer.predict(model, dataloaders=test_dl) # this can take a little bit of time (like 10-20min depending on your system)

In [None]:
false_neg, false_pos = np.zeros((1, 17)), np.zeros((1, 17))
y_hat, y = list(map(list, zip(*rs)))
overall_accuracy = 0.0
hamming_dist = 0.0
for i in range(len(y_hat)):
    overall_accuracy += overall_acc(y_hat[i], y[i])
    hamming_dist += Hamming_distance(y_hat[i], y[i])
    false_positive, false_negative = count_false(y_hat[i], y[i])
    false_pos += false_positive
    false_neg += false_negative
overall_accuracy = overall_accuracy/len(y_hat)
hamming_dist = hamming_dist/len(y_hat)
                                        
print(f"Overall accuracy: {overall_accuracy:.4f}\nHamming distance: {hamming_dist}")
print("False positive (predict 1 instead of 0): ",false_pos, "\nFalse negative (predict 0 instead of 1): ",false_neg)

#### Make the graphs

In [None]:
import os
import numpy as np
from skimage.io import imsave, imread
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# Load the logs
LOG_PATH = './logs/our_version_loss_with_weights/metrics.csv'
log_df = pd.read_csv(LOG_PATH, sep=';')
log_df.index = log_df.epoch
log_df.drop(columns=['epoch'], inplace=True)

In [None]:
fig1,fig2,fig3,fig4 = plt.figure(figsize=(15,9)),plt.figure(figsize=(15,9)),plt.figure(figsize=(15,9)),plt.figure(figsize=(15,9))
ax1 = fig1.add_subplot(111)
ax2 = fig2.add_subplot(111)
ax3 = fig3.add_subplot(111)
ax4 = fig4.add_subplot(111)
axs = [ax1,ax2,ax3,ax4]
figs = [fig1,fig2,fig3,fig4]
values = [log_df.train_loss, log_df.val_loss, log_df.hamming_dist, log_df.val_accuracy]

for i in range(4):
  axs[i].set_xlim([0,49])
  axs[i].grid(visible=True, which='major', axis='both', linestyle='--', alpha=0.7)
  axs[i].plot(log_df.index, values[i])
  axs[i].set_xlabel('Epoch', fontsize=15)

ax1.set_ylabel('Training loss', fontsize=15), ax1.set_title('Loss on training set\n', fontsize=20)

ax2.set_ylabel('Validation loss', fontsize=15), ax2.set_title('Loss on validation set\n', fontsize=20)

ax3.set_ylabel('Hamming distance', fontsize=15), ax3.set_title('Hamming distance\n', fontsize=20)

ax4.set_ylabel('Validation accuracy', fontsize=15), ax4.set_title('Accuracy of model on validation set\n', fontsize=20)