In [4]:
import rioxarray as rxr
import dask.array as da
import rasterio
import csv
import os
import geopandas as gpd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
import cdsapi

dataset = "derived-utci-historical"
request = {
    "variable": [
        "universal_thermal_climate_index",
        "mean_radiant_temperature"
    ],
    "version": "1_1",
    "product_type": "intermediate_dataset",
    "year": ["2019", "2020"],
    "month": [
        "01", "02", "03",
        "04", "05", "06",
        "07", "08", "09",
        "10", "11", "12"
    ],
    "day": [
        "01", "02", "03",
        "04", "05", "06",
        "07", "08", "09",
        "10", "11", "12",
        "13", "14", "15",
        "16", "17", "18",
        "19", "20", "21",
        "22", "23", "24",
        "25", "26", "27",
        "28", "29", "30",
        "31"
    ]
}

client = cdsapi.Client()
client.retrieve(dataset, request).download()


Exception: Missing/incomplete configuration file: C:\Users\rachn/.cdsapirc

In [None]:
# Load the .tif file directly
input_tif = "GloUTCI-M_YEAR_2000_MONTH_03.tif"  # Change this to your file path
output_csv = "processed_data.csv"
temp_csv = "temp_data.csv"  # Temporary file to avoid memory overflow

# Open raster file efficiently
with rasterio.open(input_tif) as dataset:
    transform = dataset.transform  # Affine transform (for geo-coordinates)
    height, width = dataset.height, dataset.width  # Get raster dimensions
    chunk_size = 100  # Process only 100 rows at a time to minimize memory usage

    # Remove old temp file if it exists
    if os.path.exists(temp_csv):
        os.remove(temp_csv)

    # Open CSV for writing (streaming mode)
    with open(temp_csv, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["longitude", "latitude", "utci"])  # Column headers

        # Process in ultra-small chunks to prevent crashes
        for row_start in range(0, height, chunk_size):
            row_end = min(row_start + chunk_size, height)  # Don't exceed height

            # Read only a small section of the raster
            band1 = dataset.read(1, window=((row_start, row_end), (0, width)))
            band1 = band1.astype(np.float32) / 100  # Apply scaling

            # Generate latitude/longitude for this small section
            rows, cols = np.meshgrid(
                np.arange(row_start, row_end),
                np.arange(width),
                indexing="ij"
            )
            xs, ys = rasterio.transform.xy(transform, rows, cols)

            # Write data to CSV row-by-row
            for x, y, value in zip(xs.flatten(), ys.flatten(), band1.flatten()):
                writer.writerow([x, y, value])

# Move processed file to final location
os.rename(temp_csv, output_csv)

print(f"Processing complete! Data saved to {output_csv}")