# MSc. Thesis Final Results & Reports

In [1]:
import os 
import sys 
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')
# Pytorch module 
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter

import pytorch_lightning as pl

import random
from typing import Any, Mapping
from time import gmtime, strftime

# set working directory
os.chdir('/home/jovyan/contrails/contrails_detection/')

# Project Module
import config
import src.utils as utils
from src.dataset import ContrailsDataset
from src.model import BaseUnet, SegmentLightning
from src.train import Trainer
from src.trainer_lgt import TrainerLightning

def clear_torch_cache():
    torch.cuda.empty_cache()
    print("Cleared PyTorch GPU cache.")

# Run the function to clear GPU cache
clear_torch_cache()

!nvidia-smi

Cleared PyTorch GPU cache.
Sun Jul 14 07:09:57 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.116.03   Driver Version: 525.116.03   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA RTX A4000    Off  | 00000000:03:00.0 Off |                  Off |
| 41%   41C    P8     7W / 140W |   2038MiB / 16376MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+------------------------------------------------------------

## Data Collection & Sampling

In [2]:
## Load file from csv split into train/test
from sklearn.model_selection import train_test_split
df = pd.read_csv(config.CSV_VALID_REC, index_col=0)
train_df, valid_df = train_test_split(df[df.Class.isin([0,1])], test_size = .2, random_state=42)
train_df.head()

Unnamed: 0,Image_ID,Class
821,1074165712766223877,0
244,127580362208497184,1
1224,1597708808159006972,0
1637,1582674860208357528,0
1080,1349536550702205092,1


In [3]:
train_df.groupby('Class').count()

Unnamed: 0_level_0,Image_ID
Class,Unnamed: 1_level_1
0,824
1,658


In [4]:
data_1 = train_df[train_df.Class == 1].tail(4)
data_2 = train_df.head(4)

In [5]:
validation_df = pd.concat([data_1, data_2])

In [7]:
validation_df

Unnamed: 0,Image_ID,Class
1239,1465677465640550265,1
467,1083465033403499127,1
1131,1288917852422534876,1
1295,135845313190230696,1
821,1074165712766223877,0
244,127580362208497184,1
1224,1597708808159006972,0
1637,1582674860208357528,0


## Model Results Band-ALL

In [8]:
## Get dataset
torch.manual_seed(44)
transform = transforms.Compose([
    # Add your desired transformations here
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x / 255.0),
])


train_dataset = ContrailsDataset(dataframe=validation_df,
                                 root_dir=config.DATA_DIR+'validation', 
                                 transform=transform, 
                                 sequence_index=config.TS # nth:5 sequence image
                                ) 

## Data Loaders
train_dataloader = DataLoader(train_dataset,
                              batch_size=8, 
                              shuffle=False, 
                              num_workers=config.WORKERS, 
                              pin_memory=True, 
                              prefetch_factor=config.WORKERS,
                             )

In [None]:
image, mask = next(iter(train_dataloader))
image.shape, mask.shape

In [None]:
utils.plot_rgb_and_mask(image, mask)

In [6]:
# List all files in the folder
folder_path = 'result_outs/'
files = os.listdir(folder_path)

# Filter out only CSV files
csv_files = [f for f in files if f.endswith('.csv')]
csv_files

['baseline-unetTDi_Gauss_Contrail_Bands.csv',
 'baseline-unetAshRGB_RFCH.csv',
 'baseline-unetAshRGB_N2.csv',
 'baseline-unetGaussian_ALL.csv',
 'baseline-unetALL.csv']

In [7]:
# Initialize an empty list to store the describe dataframes
describe_dataframes = []

# Loop through each CSV file
for file in csv_files:
    file_path = os.path.join(folder_path, file)
    # Read the CSV file into a dataframe
    df = pd.read_csv(file_path)
    # Compute the descriptive statistics
    describe_df = df.describe()
    # Add a column for the filename
    describe_df['filename'] = file
    # Append the dataframe to the list
    describe_dataframes.append(describe_df)

# Concatenate all the describe dataframes into a single dataframe
final_describe_stats = pd.concat(describe_dataframes)

In [8]:
accu_ass_result_df = final_describe_stats.reset_index()

In [9]:
# Set the display format to limit decimals to 2
pd.options.display.float_format = '{:.2f}'.format
accu_ass_result_df[accu_ass_result_df["index"] == 'mean'].iloc[:, 3:]

Unnamed: 0,Accuracy,Precision,Recall,F1_Score,IoU,filename
1,1.0,0.88,0.81,0.81,0.21,baseline-unetTDi_Gauss_Contrail_Bands.csv
9,1.0,0.86,0.78,0.75,0.19,baseline-unetAshRGB_RFCH.csv
17,1.0,0.88,0.79,0.79,0.2,baseline-unetAshRGB_N2.csv
25,1.0,0.88,0.8,0.8,0.21,baseline-unetGaussian_ALL.csv
33,1.0,0.86,0.77,0.76,0.18,baseline-unetALL.csv


In [10]:
import json

# Define the folder containing the JSON files
folder_path = 'result_outs/'

# List all files in the folder
files = os.listdir(folder_path)

# Filter out only JSON files
json_files = [f for f in files if f.endswith('.json')]

# Initialize an empty list to store the dataframes
json_dataframes = []

# Loop through each JSON file
for file in json_files:
    file_path = os.path.join(folder_path, file)
    
    # Read the JSON file into a dataframe
    with open(file_path, 'r') as f:
        data = json.load(f)
        df = pd.json_normalize(data)
    
    # Add a column for the filename
    df['filename'] = file
    
    # Append the dataframe to the list
    json_dataframes.append(df)

# Concatenate all the JSON dataframes into a single dataframe
final_json_df = pd.concat(json_dataframes, ignore_index=True)

In [11]:
final_json_df

Unnamed: 0,Class_Accuracy,False_Positive_Rate,False_Negative_Rate,Confusion_Matrix,filename
0,"[[0.9989676685897343, 0.6366107329784128]]",[0.001839951924487224],[0.2424282926568473],"[[0.993931017423931, 0.0010271265439176366, 0....",ash_rgb_metrics.json
1,"[[0.9989463304912373, 0.6496067896431802]]",[0.0017743034659623183],[0.24247425135440853],"[[0.9939097869090545, 0.0010483570587940705, 0...",gauss_all_metrics.json
2,"[[0.9991185564724625, 0.6050803880846336]]",[0.00199897927365689],[0.2232841736613907],"[[0.9940811445516774, 0.000876999416171137, 0....",ash_rgb_rff_metrics.json
3,"[[0.9989901449961046, 0.6542076877679007]]",[0.0017509698012979657],[0.2334930038474054],"[[0.9939533805074962, 0.0010047634603523533, 0...",gauss_contb_metrics.json
