## Data Processing


###  Data Processing Workflow

This notebook prepares the input tensor for the U-Net model by combining satellite data and OSM masks.

### Step 1: Load Raster Data
- Import Sentinel-2 bands
- Import Landsat 8 LST band
- Import rasterized OSM Masks
- All files are located in: `/content/drive/MyDrive/UHI-Detection-Analysis/data/raw/`

### Step 2: Align and Reproject
- Ensure all rasters have the same CRS and resolution (e.g., 10m)

### Step 3: Compute Indices
- NDVI = (B08 - B04) / (B08 + B04)
- LST is already available
- Optionally compute another index (e.g., NDBI)

### Step 4: Stack Tensor
- Combine NDVI, LST, other indices, and OSM masks into a multi-channel tensor

### Step 5: Save Outputs
- Save tensor as `.npy` or `.tif` in `data/processed/`
- Save metadata as `meta.json`


In [None]:
import rasterio
import os
from google.colab import drive

# Define paths
drive.mount('/content/drive')
work_dir = '/content/drive/MyDrive/UHI-Detection-Analysis/data/raw/'
os.makedirs(work_dir, exist_ok=True)

years = list(range(2015, 2024))

# Dictionary to store loaded rasters
sentinel_rasters = {}

# Loop through each year and load the corresponding raster file
for year in years:
    file_name = f'sentinel_{year}_multiband.tif'
    file_path = os.path.join(work_dir, file_name)

    try:
        with rasterio.open(file_path) as src:
            sentinel_rasters[year] = {
                'data': src.read(),  # Read all bands
                'meta': src.meta     # Store metadata
            }
        print(f"Loaded: {file_name}")
    except FileNotFoundError:
        print(f"File not found: {file_name}")

# Print summary of loaded years
print(f"\nSuccessfully loaded Sentinel-2 data for years: {list(sentinel_rasters.keys())}")



In [None]:
# Dictionary to store loaded LST rasters
lst_rasters = {}

# Loop through each year and load the corresponding LST raster file
for year in years:
    file_name = f'landsat8_LST_{year}.tif'
    file_path = os.path.join(work_dir, file_name)

    try:
        with rasterio.open(file_path) as src:
            lst_rasters[year] = {
                'data': src.read(1),  # Read the first band (LST)
                'meta': src.meta      # Store metadata
            }
        print(f"Loaded: {file_name}")
    except FileNotFoundError:
        print(f"File not found: {file_name}")

# Print summary of loaded years
print(f"\nSuccessfully loaded Landsat 8 LST data for years: {list(lst_rasters.keys())}")


In [None]:
# Define the directory containing rasterized OSM masks
osm_dir = '/content/drive/MyDrive/UHI-Detection-Analysis/data/raw/osm/'

# Define expected mask filenames
mask_files = {
    'building': 'building_mask.tif',
    'road': 'road_mask.tif',
    'green_space': 'green_space_mask.tif',
    'water': 'water_mask.tif'
}

# Dictionary to store loaded OSM masks
osm_masks = {}

# Load each mask file
for key, filename in mask_files.items():
    file_path = os.path.join(osm_dir, filename)
    try:
        with rasterio.open(file_path) as src:
            osm_masks[key] = {
                'data': src.read(1),  # Read single band
                'meta': src.meta      # Store metadata
            }
        print(f"Loaded OSM mask: {filename}")
    except FileNotFoundError:
        print(f"OSM mask file not found: {filename}")

# Summary
print(f"\nSuccessfully loaded OSM masks: {list(osm_masks.keys())}")
