# SETUP

In [None]:
!pip install torch einops numpy timm==0.6.13 scipy gcsfs cdsapi xarray zarr netcdf4 matplotlib pandas

In [None]:
%cd /workspace/aurora_229s
!git pull

In [None]:
import importlib
from pathlib import Path
import datetime
import numpy as np
import torch
import gc
import pandas as pd

In [None]:
from aurora import inference_helper, evaluation_helper, compression
from aurora.model import aurora, swin3d

def reload():
    importlib.reload(inference_helper)
    importlib.reload(evaluation_helper)
    importlib.reload(compression)
    importlib.reload(aurora)
    importlib.reload(swin3d)

In [None]:
def gpu_mem(msg):
    print(f'{msg}:')
    print("\ttorch.cuda.memory_allocated: %fGB"%(torch.cuda.memory_allocated(0)/1024/1024/1024))
    print("\ttorch.cuda.memory_reserved: %fGB"%(torch.cuda.memory_reserved(0)/1024/1024/1024))
    print("\ttorch.cuda.max_memory_reserved: %fGB"%(torch.cuda.max_memory_reserved(0)/1024/1024/1024))
    print()

def print_timestamp():
    current_time = datetime.datetime.now()
    formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
    print(formatted_time)

In [None]:
model = aurora.Aurora()
model.load_checkpoint("microsoft/aurora", "aurora-0.25-small-pretrained.ckpt")
model.eval()

# Parameters

In [None]:
download_path = Path("/workspace/data")

save_dir = Path("/workspace/results")
save_dir.mkdir(exist_ok=True, parents=True)

device = 'cuda'

surf_vars_names_wts, atmos_vars_names_wts = inference_helper.get_vars_names_wts()
n_multiday_days = 7
multiday_starts = ['2022-05-01', '2022-08-01']

compression_ratios = [0.5, 0.25, 0.75]
base_grad_dir = Path("/workspace/models/fisher")
lh_task_names = ['multitask'] + [lh for _,lh,_ in surf_vars_names_wts] + [lh for _,lh,_ in atmos_vars_names_wts]

sameday_starts = []
for day in multiday_starts:
    sameday_starts.append(day)
    for _ in range(n_multiday_days-1):
        day = inference_helper.increment_day(day)
        sameday_starts.append(day)

### Baseline

In [None]:
model_name = 'baseline'
total_df = None

# Sameday
for day in sameday_starts:
    day_results_df = evaluation_helper.same_day_eval(model=model, day=day, download_path=download_path, device=device)

    if total_df is None:
        total_df = day_results_df.copy(deep=True)
    else:
        total_df = pd.concat([total_df, day_results_df], axis=0).reset_index(drop=True)

total_df.to_csv(save_dir / 'sameday' / f'{model_name}.csv', index=False)
del day_results_df, total_df, day

# Multiday
total_df = None
for day in multiday_starts:
    md_results_df = evaluation_helper.multi_day_eval(
        model=model, day=day, download_path=download_path,
        max_n_days=n_multiday_days, device=device, verbose=True
    )

    if total_df is None:
        total_df = md_results_df.copy(deep=True)
    else:
        total_df = pd.concat([total_df, md_results_df], axis=0).reset_index(drop=True)

total_df.to_csv(save_dir / 'multiday' / f'{model_name}.csv', index=False)
del md_results_df, total_df, day

### SVD compression loop

In [None]:
def comp_inference_loop(c_model, c_model_name):
    # Sameday
    for day in sameday_starts:
        day_results_df = evaluation_helper.same_day_eval(model=c_model, day=day, download_path=download_path, device=device)

        if total_df is None:
            total_df = day_results_df.copy(deep=True)
        else:
            total_df = pd.concat([total_df, day_results_df], axis=0).reset_index(drop=True)

    total_df.to_csv(save_dir / 'sameday' / f'{c_model_name}.csv', index=False)

    # Multiday
    total_df = None
    for day in multiday_starts:
        md_results_df = evaluation_helper.multi_day_eval(
            model=c_model, day=day, download_path=download_path,
            max_n_days=n_multiday_days, device=device, verbose=False
        )

        if total_df is None:
            total_df = md_results_df.copy(deep=True)
        else:
            total_df = pd.concat([total_df, md_results_df], axis=0).reset_index(drop=True)

    total_df.to_csv(save_dir / 'multiday' / f'{c_model_name}.csv', index=False)

In [None]:
for ratio in compression_ratios:
    print(ratio)
    comp_inference_loop(
        c_model=compression.svd_only_compression(original_model=model, ratio=ratio),
        c_model_name=f'svd_{ratio}'
    )
print('DONE!!!')

In [None]:
# Baseline Fisher
for lh in lh_task_names:
    print(lh)
    for ratio in compression_ratios:
        print('\t', ratio)
        comp_inference_loop(
            c_model=compression.fisher_base_compression(original_model=model, ratio=ratio, grad_path=base_grad_dir / lh),
            c_model_name=f'fisher_base_{lh}_{ratio}'
        )
print('DONE!!!')

In [None]:
# Improved Fisher
for lh in lh_task_names:
    print(lh)
    for ratio in compression_ratios:
        print('\t', ratio)
        comp_inference_loop(
            c_model=compression.fisher_improved_compression(original_model=model, ratio=ratio, grad_path=base_grad_dir / lh),
            c_model_name=f'fisher_base_{lh}_{ratio}'
        )
print('DONE!!!')