In [3]:
# imports
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import json
from tqdm import tqdm
import math

In [4]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [5]:
# paths
BASE_PATH = "drive/MyDrive/semester 4/csci 1470: final project/"
BASE_PATH_END = "_dataset.npz"

FULL_TRAIN = BASE_PATH + "final_train" + BASE_PATH_END
FULL_TEST = BASE_PATH + "final_test" + BASE_PATH_END
FULL_VAL = BASE_PATH + "final_val" + BASE_PATH_END

ABLATION_TRAIN = BASE_PATH + "ablation_datasets/train" + BASE_PATH_END

OPTIONS = ["AV", "SV", "LST", "FP"] # aerial view, street view, land surface temperature, footprint

In [None]:
def create_ablation_sets(set_type):
  full_path = BASE_PATH + "final_" + set_type + BASE_PATH_END
  full_set = np.load(full_path)

  feature_vectors = full_set['X']
  ground_truth_labels = full_set['y']

  ablations_to_indices = {"av": (0, 2048), "sv": (2048, 4096), "lst": (4096, 4099), "fp": (4099, 4100)}

  for ablation in ablations_to_indices.keys():
    ablation_path = BASE_PATH + "ablation_datasets/" + ablation + "/" + set_type + BASE_PATH_END
    start, end = ablations_to_indices[ablation]
    ablation_vectors = feature_vectors[:, start:end]

    print("")
    print(f"{ablation}")
    print(f"ablation path: {ablation_path}")
    print(f"ablation vector: {ablation_vectors[0:5]}")
    print(f"ablation vector shape: {ablation_vectors.shape}")

    print(f"ablation labels: {ground_truth_labels[0:5]}")
    print(f"ablation labels shape: {ground_truth_labels.shape}")

    np.savez(ablation_path, X=ablation_vectors, y=ground_truth_labels)

In [None]:
sets = ["train", "test", "val"]
for set_type in sets:
  create_ablation_sets(set_type)


av
ablation path: drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/av/train_dataset.npz
ablation vector: [[0.24447544 0.05483738 1.86065853 ... 0.20230077 0.16132569 0.14471005]
 [0.229709   0.         1.59671187 ... 0.19480306 0.00926759 0.28839502]
 [0.33060732 0.03800951 2.2552762  ... 0.29981568 0.4973723  0.35247529]
 [0.25057054 0.22028881 1.53538156 ... 0.00943396 0.15820757 0.32604054]
 [0.44032246 0.05181219 0.51297194 ... 0.00635438 0.33177552 0.30192652]]
ablation vector shape: (27922, 2048)
ablation labels: [0 1 0 1 0]
ablation labels shape: (27922,)

sv
ablation path: drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/sv/train_dataset.npz
ablation vector: [[1.85516691 0.0539104  0.36174446 ... 0.49106139 0.51436728 0.20145689]
 [0.17261487 0.0699513  0.45943156 ... 0.17420448 0.15880528 0.05725829]
 [0.87558472 0.05036554 0.06197319 ... 0.5189476  1.62799048 0.23495658]
 [0.32509202 0.17424817 0.11729822 ... 0.05421016 1.30562973 0.2008

**Ablation Pairs**

In [6]:
import itertools
def create_ablation_pair_sets(set_type):
  full_path = BASE_PATH + "final_" + set_type + BASE_PATH_END
  full_set = np.load(full_path)

  feature_vectors = full_set['X']
  ground_truth_labels = full_set['y']

  ablations_to_indices = {"av": (0, 2048), "sv": (2048, 4096), "lst": (4096, 4099), "fp": (4099, 4100)}
  for a, b in itertools.combinations(ablations_to_indices.keys(), 2):
        s1, e1 = ablations_to_indices[a]
        s2, e2 = ablations_to_indices[b]

        X_ab = np.concatenate(
            [feature_vectors[:, s1:e1],
             feature_vectors[:, s2:e2]],
            axis=1
        )

        combo_name = f"{a}_{b}"
        out_dir = os.path.join(BASE_PATH, "ablation_datasets", combo_name)
        os.makedirs(out_dir, exist_ok=True)
        out_path = os.path.join(out_dir, f"{set_type}.npz")

        print(f"Saving pairwise ablation {combo_name:4s} → {X_ab.shape} at {out_path}")
        np.savez(out_path, X=X_ab, y=ground_truth_labels)

In [7]:
sets = ["train", "test", "val"]
for set_type in sets:
  create_ablation_pair_sets(set_type)

Saving pairwise ablation av_sv → (27922, 4096) at drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/av_sv/train.npz
Saving pairwise ablation av_lst → (27922, 2051) at drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/av_lst/train.npz
Saving pairwise ablation av_fp → (27922, 2049) at drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/av_fp/train.npz
Saving pairwise ablation sv_lst → (27922, 2051) at drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/sv_lst/train.npz
Saving pairwise ablation sv_fp → (27922, 2049) at drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/sv_fp/train.npz
Saving pairwise ablation lst_fp → (27922, 4) at drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/lst_fp/train.npz
Saving pairwise ablation av_sv → (2756, 4096) at drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/av_sv/test.npz
Saving pairwise ablation av_lst → (2756, 2051) at drive/MyDri

**Ablation Triplets**

In [11]:
def create_ablation_triplet_sets(split):
    full_path = os.path.join(BASE_PATH, f"final_{split}{BASE_PATH_END}")
    data  = np.load(full_path)
    X_full = data['X']
    y_full = data['y']

    ablations_to_indices = {"av": (0, 2048), "sv": (2048, 4096), "lst": (4096, 4099), "fp": (4099, 4100)}
    for combo in itertools.combinations(ablations_to_indices.keys(), 3):
        blocks = []
        for key in combo:
            s, e = ablations_to_indices[key]
            blocks.append(X_full[:, s:e])

        X_combo = np.concatenate(blocks, axis=1)

        combo_name = "_".join(combo)
        out_dir = os.path.join(BASE_PATH, "ablation_datasets", combo_name)
        os.makedirs(out_dir, exist_ok=True)

        out_path = os.path.join(out_dir, f"{split}{BASE_PATH_END}")
        print(f"Saving {combo_name} → {X_combo.shape} at\n    {out_path}")
        np.savez(out_path, X=X_combo, y=y_full)

In [12]:
sets = ["train", "test", "val"]
for set_type in sets:
  create_ablation_triplet_sets(set_type)

Saving av_sv_lst → (27922, 4099) at
    drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/av_sv_lst/train_dataset.npz
Saving av_sv_fp → (27922, 4097) at
    drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/av_sv_fp/train_dataset.npz
Saving av_lst_fp → (27922, 2052) at
    drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/av_lst_fp/train_dataset.npz
Saving sv_lst_fp → (27922, 2052) at
    drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/sv_lst_fp/train_dataset.npz
Saving av_sv_lst → (2756, 4099) at
    drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/av_sv_lst/test_dataset.npz
Saving av_sv_fp → (2756, 4097) at
    drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/av_sv_fp/test_dataset.npz
Saving av_lst_fp → (2756, 2052) at
    drive/MyDrive/semester 4/csci 1470: final project/ablation_datasets/av_lst_fp/test_dataset.npz
Saving sv_lst_fp → (2756, 2052) at
    drive/MyDrive/semes