# Collecting Heatmaps

# Load Test Dataset

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import zipfile
import os

file_names = ["reduced_class_test_images"]
folder_names = ["test_images"]

for file_name, folder_name in zip(file_names, folder_names):
  # Path to the ZIP file (could be from your Colab environment or Google Drive)
  zip_file_path = f'/content/drive/My Drive/Webcam_Project/datasets/reduced_class_images/{file_name}.zip'

  # Create target directory if it doesn't exist
  if not os.path.exists(folder_name):
      os.makedirs(folder_name)

  # Unzip the file
  with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
      zip_ref.extractall(folder_name)

  print(f'Files extracted to {folder_name}')

Files extracted to test_images


In [3]:
!find ./test_images/MISSING -maxdepth 1 -type f | wc -l
!find ./test_images/RAIN_ONLY -maxdepth 1 -type f | wc -l
!find ./test_images/SNOW_ONLY -maxdepth 1 -type f | wc -l

1000
1000
1000


# Load Fine-Tuned Model

In [4]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models, transforms
import torch.nn as nn
import torch.optim as optim

In [5]:
from torchvision import datasets, transforms
import torch
from torch.utils.data import DataLoader

# Define your transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)), # Resize the image to 224x224 pixels
    transforms.ToTensor(),         # Convert the image to a PyTorch tensor
    transforms.Normalize([0.485, 0.456, 0.406], # Normalize using ImageNet mean and std
                         [0.229, 0.224, 0.225])
])

test_dataset = datasets.ImageFolder(root='test_images/', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [6]:
CLASS_NAMES = ['MISSING', 'RAIN_ONLY', 'SNOW_ONLY']

In [7]:
test_dataset.classes

['MISSING', 'RAIN_ONLY', 'SNOW_ONLY']

In [8]:
from torchvision import models
import torch.nn as nn

# Load pre-trained MobileNetV2
model = models.mobilenet_v2(pretrained=True)

# Number of features in the last layer of the model
num_ftrs = model.classifier[1].in_features

# Adjust the classifier to your number of classes
# Replace `num_classes` with the actual number of classes
num_classes = len(CLASS_NAMES)  # Assuming 'dataset' is your ImageFolder dataset
model.classifier[1] = nn.Linear(num_ftrs, num_classes)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 161MB/s]


In [9]:
# Path to your saved model weights
model_weights_path = '/content/drive/My Drive/Webcam_Project/model_weights/new_models/mobilenetv2_model_weights.pth'

# Load the weights into the model
model.load_state_dict(torch.load(model_weights_path, map_location=device))

<All keys matched successfully>

# Generate Heatmaps

In [10]:
# Ensure model is on the correct device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
_ = model.to(device)

In [11]:
target_layer = model.features[18]  # Adjusted to target the last Conv2dNormActivation

In [12]:
import torch
import torch.nn.functional as F

class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None
        target_layer.register_forward_hook(self.save_activation)
        target_layer.register_backward_hook(self.save_gradient)

    def save_activation(self, module, input, output):
        self.activations = output

    def save_gradient(self, module, input, output):
        self.gradients = output[0]

    def __call__(self, x):
        self.model.zero_grad()
        output = self.model(x)
        if isinstance(output, tuple):
            output = output[0]
        one_hot_output = torch.FloatTensor(1, output.size()[-1]).zero_().to(device)
        one_hot_output[0][torch.argmax(output)] = 1
        output.backward(gradient=one_hot_output, retain_graph=True)
        return output

def generate_heatmap(gradients, activations):
    pooled_gradients = torch.mean(gradients, dim=[0, 2, 3])
    for i in range(activations.shape[1]):
        activations[:, i, :, :] *= pooled_gradients[i]
    heatmap = torch.mean(activations, dim=1).squeeze()
    heatmap = F.relu(heatmap)
    heatmap /= torch.max(heatmap)
    return heatmap

In [13]:
from google.colab.patches import cv2_imshow
import numpy as np
import cv2
from PIL import Image
from torchvision import transforms

def overlay_deepest_red_areas(img_paths, target_img_path, display_img=False, save_path_overlay=None):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    aggregated_heatmap = None

    # Process each image and aggregate the heatmaps
    for img_path in img_paths:
        img = Image.open(img_path)
        img = transform(img).unsqueeze(0)

        # Assuming the model and other components are set up correctly
        img = img.to(device)
        grad_cam = GradCAM(model, target_layer)
        _ = grad_cam(img)
        heatmap = generate_heatmap(grad_cam.gradients, grad_cam.activations)
        heatmap = heatmap.detach().cpu().numpy()

        if aggregated_heatmap is None:
            aggregated_heatmap = heatmap
        else:
            aggregated_heatmap = np.maximum(aggregated_heatmap, heatmap)

    # Load the target image where the heatmap will be overlaid
    target_img = cv2.imread(target_img_path)
    heatmap_resized = cv2.resize(aggregated_heatmap, (target_img.shape[1], target_img.shape[0]))

    # Apply a very high threshold to focus only on the deepest red areas
    deep_red_threshold = np.percentile(heatmap_resized, 50)  # High percentile for deep red
    mask = heatmap_resized > deep_red_threshold

    # Prepare the masked heatmap for overlay
    masked_heatmap = np.zeros_like(heatmap_resized)
    masked_heatmap[mask] = heatmap_resized[mask]

    # Convert to RGB
    heatmap_color = cv2.applyColorMap(np.uint8(255 * masked_heatmap), cv2.COLORMAP_JET)

    # Create an overlay image
    overlay_img = target_img.copy()
    overlay_img[mask] = cv2.addWeighted(target_img, 0.6, heatmap_color, 0.4, 0)[mask]

    # Display and save the overlay
    if display_img:
        cv2_imshow(overlay_img)
    if save_path_overlay is not None:
        cv2.imwrite(save_path_overlay, overlay_img)
        print(f'Saved img at {save_path_overlay}...')

In [14]:
import pandas as pd

matched_imgs_stat = f"/content/drive/My Drive/Webcam_Project/media/results_and_stats/selected_1600_images_info_expanded.csv"
matched_imgs_df = pd.read_csv(matched_imgs_stat)
matched_imgs_df.head()

Unnamed: 0,tmpc,precipitation_category,prediction_category,filename,matched,station,direction,datetime,latitude,longitude,elevation,province,sunrise,sunset,time_of_day,month,tmpc_range
0,-16.0,SNOW_ONLY,SNOW_ONLY,CYOJ-NE-2023_03_15-17_04_30.jpg,True,CYOJ,NE,2023-03-15 17:04:30,58.62158,-117.16467,338.0,Alberta,2024-03-28 13:25:37.070789,2024-03-28 02:20:22.245270,Afternoon,MAR,<=-1
1,0.0,MISSING,MISSING,CYQL-SW-2023_03_17-01_04_30.jpg,True,CYQL,SW,2023-03-17 01:04:30,49.63028,-112.79972,929.0,Alberta,2024-03-28 13:15:33.466575,2024-03-28 01:55:50.639744,Night,MAR,-1~3
2,1.0,MISSING,MISSING,CYQF-W-2023_03_19-21_04_30.jpg,True,CYQF,W,2023-03-19 21:04:30,52.18229,-113.89448,904.6,Alberta,2024-03-28 13:18:09.748341,2024-03-28 02:01:54.949607,Afternoon,MAR,-1~3
3,-7.0,MISSING,MISSING,CYZU-SE-2023_03_21-16_04_30.jpg,True,CYZU,SE,2023-03-21 16:04:30,54.14385,-115.78677,782.4,Alberta,2024-03-28 13:24:12.470131,2024-03-28 02:10:56.300999,Afternoon,MAR,<=-1
4,0.0,MISSING,MISSING,CYZU-SE-2023_03_22-18_04_30.jpg,True,CYZU,SE,2023-03-22 18:04:30,54.14385,-115.78677,782.4,Alberta,2024-03-28 13:24:12.470131,2024-03-28 02:10:56.300999,Afternoon,MAR,-1~3


In [15]:
extended_filename_matched = {}
for index, row in matched_imgs_df.iterrows():
    filename = row['filename'].split('.jpg')[0] + '-1.jpg'
    matched = row['matched']
    if matched:
      extended_filename_matched[filename] = matched

In [16]:
import os
import glob

def list_files_and_group_by_code(directory):
    # Ensure the directory path ends with a separator
    directory = os.path.abspath(directory) + os.sep

    # List all files under the given directory recursively
    files = glob.glob(directory + '**/*', recursive=True)

    # Filter out directories, keep only files
    files = [file for file in files if os.path.isfile(file) if file.split('/')[-1] in extended_filename_matched]

    # Dictionary to hold the groups
    code_to_files = {}

    # Process each file path
    for file_path in files:
        # Get the filename from the path
        filename = os.path.basename(file_path)

        # Extract the code, which is the first 4 characters of the filename
        code = filename[:4]

        # If the code is not already a key in the dictionary, add it
        if code not in code_to_files:
            code_to_files[code] = []

        # Append the file path to the corresponding code list
        code_to_files[code].append(file_path)

    # Sort the lists of files for each code
    for code in code_to_files:
        code_to_files[code].sort()

    return code_to_files

In [17]:
grouped_no_precip_files = list_files_and_group_by_code('/content/test_images/MISSING')
grouped_rain_files = list_files_and_group_by_code('/content/test_images/RAIN_ONLY')
grouped_snow_files = list_files_and_group_by_code('/content/test_images/SNOW_ONLY')

In [18]:
no_precip_stations = list(grouped_no_precip_files.keys())
rain_stations = list(grouped_rain_files.keys())
snow_stations = list(grouped_snow_files.keys())

# Convert lists to sets
no_precip_stations = set(no_precip_stations)
rain_stations = set(rain_stations)
snow_stations = set(snow_stations)

# Find the intersection of these sets
common_stations = sorted(list(no_precip_stations.intersection(rain_stations, snow_stations)))
','.join(common_stations)

'CYBD,CYBV,CYFO,CYGE,CYIV,CYJF,CYKD,CYLK,CYLL,CYOJ,CYQB,CYQF,CYQL,CYQT,CYRA,CYVR,CYWK,CYYB,CYYD,CYYQ,CYZU,CZFN'

In [19]:
common_stations_with_min_ten_imgs = [station for station in common_stations if len(grouped_no_precip_files[station]) >= 10 and len(grouped_rain_files[station]) >= 10 and len(grouped_snow_files[station]) >= 10]
','.join(common_stations_with_min_ten_imgs)

'CYFO,CYIV,CYQB,CYQT,CYWK,CYYB,CYYD'

# Collecting Heatmaps

In [20]:
import os

def create_folder_if_not_exists(folder_path):
    # Check if the folder already exists
    if not os.path.exists(folder_path):
        # Create the folder
        os.makedirs(folder_path)
        print(f"Folder '{folder_path}' created.")
    else:
        print(f"Folder '{folder_path}' already exists.")

In [21]:
import os
import zipfile

def zip_folder(folder_path, output_path):
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        len_dir_path = len(os.path.dirname(folder_path))
        for root, _, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, file_path[len_dir_path:])  # Add file with shortened path

In [22]:
# Each img takes around 2-3GB, adjust the code below depending on how many GBs of RAM there are available on the GPU
cur_idx = 4

In [23]:
from google.colab import files

station1 = common_stations_with_min_ten_imgs[cur_idx]
station2 = common_stations_with_min_ten_imgs[cur_idx+1]
station3 = common_stations_with_min_ten_imgs[cur_idx+2]
# station4 = common_stations_with_min_ten_imgs[cur_idx+3]
create_folder_if_not_exists('collected_heatmaps')
overlay_deepest_red_areas(img_paths=grouped_no_precip_files[station1], target_img_path=grouped_no_precip_files[station1][0], save_path_overlay=f'collected_heatmaps/{station1}_no_precip_overlay_agg_heatmap.jpg')
overlay_deepest_red_areas(img_paths=grouped_rain_files[station1], target_img_path=grouped_rain_files[station1][0], save_path_overlay=f'collected_heatmaps/{station1}_rain_overlay_agg_heatmap.jpg')
overlay_deepest_red_areas(img_paths=grouped_snow_files[station1], target_img_path=grouped_snow_files[station1][0], save_path_overlay=f'collected_heatmaps/{station1}_snow_overlay_agg_heatmap.jpg')
overlay_deepest_red_areas(img_paths=grouped_no_precip_files[station2], target_img_path=grouped_no_precip_files[station2][0], save_path_overlay=f'collected_heatmaps/{station2}_no_precip_overlay_agg_heatmap.jpg')
overlay_deepest_red_areas(img_paths=grouped_rain_files[station2], target_img_path=grouped_rain_files[station2][0], save_path_overlay=f'collected_heatmaps/{station2}_rain_overlay_agg_heatmap.jpg')
overlay_deepest_red_areas(img_paths=grouped_snow_files[station2], target_img_path=grouped_snow_files[station2][0], save_path_overlay=f'collected_heatmaps/{station2}_snow_overlay_agg_heatmap.jpg')
overlay_deepest_red_areas(img_paths=grouped_no_precip_files[station3], target_img_path=grouped_no_precip_files[station3][0], save_path_overlay=f'collected_heatmaps/{station3}_no_precip_overlay_agg_heatmap.jpg')
overlay_deepest_red_areas(img_paths=grouped_rain_files[station3], target_img_path=grouped_rain_files[station3][0], save_path_overlay=f'collected_heatmaps/{station3}_rain_overlay_agg_heatmap.jpg')
overlay_deepest_red_areas(img_paths=grouped_snow_files[station3], target_img_path=grouped_snow_files[station3][0], save_path_overlay=f'collected_heatmaps/{station3}_snow_overlay_agg_heatmap.jpg')
# overlay_deepest_red_areas(img_paths=grouped_no_precip_files[station4], target_img_path=grouped_no_precip_files[station4][0], save_path_overlay=f'collected_heatmaps/{station4}_no_precip_overlay_agg_heatmap.jpg')
# overlay_deepest_red_areas(img_paths=grouped_rain_files[station4], target_img_path=grouped_rain_files[station4][0], save_path_overlay=f'collected_heatmaps/{station4}_rain_overlay_agg_heatmap.jpg')
# overlay_deepest_red_areas(img_paths=grouped_snow_files[station4], target_img_path=grouped_snow_files[station4][0], save_path_overlay=f'collected_heatmaps/{station4}_snow_overlay_agg_heatmap.jpg')

zip_folder('./collected_heatmaps', 'collected_heatmaps.zip')
files.download('collected_heatmaps.zip')

Folder 'collected_heatmaps' created.




Saved img at collected_heatmaps/CYWK_no_precip_overlay_agg_heatmap.jpg...
Saved img at collected_heatmaps/CYWK_rain_overlay_agg_heatmap.jpg...
Saved img at collected_heatmaps/CYWK_snow_overlay_agg_heatmap.jpg...
Saved img at collected_heatmaps/CYYB_no_precip_overlay_agg_heatmap.jpg...
Saved img at collected_heatmaps/CYYB_rain_overlay_agg_heatmap.jpg...
Saved img at collected_heatmaps/CYYB_snow_overlay_agg_heatmap.jpg...
Saved img at collected_heatmaps/CYYD_no_precip_overlay_agg_heatmap.jpg...
Saved img at collected_heatmaps/CYYD_rain_overlay_agg_heatmap.jpg...
Saved img at collected_heatmaps/CYYD_snow_overlay_agg_heatmap.jpg...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>