### Import Libraries and Data

In [1]:
# Basic imports
import sys
from pathlib import Path
import os
import warnings

warnings.filterwarnings("ignore")

project_root = Path().resolve().parent
sys.path.append(str(project_root))

In [2]:
os.chdir('/Users/riyanshibohra/Documents/GitHub/metropolitan-climate-profiling')
print(os.getcwd())  # Verify the change

/Users/riyanshibohra/Documents/GitHub/metropolitan-climate-profiling


In [3]:
# Import libraries
import pandas as pd
from scripts.feature_engineering import (
    calculate_seasonal_thresholds,
    apply_uhi_classification,
)
from scripts.data_loader import load_processed_dataset, save_dataset

In [6]:
# Define paths to processed data

data_folder = Path().resolve() / "data"
dallas_path = data_folder / "processed_dallas.csv"
arlington_path = data_folder / "processed_arlington.csv"
denton_path = data_folder / "processed_denton.csv"

In [7]:
dallas = load_processed_dataset(dallas_path)
arlington = load_processed_dataset(arlington_path)
denton = load_processed_dataset(denton_path)

### Step 1: Calculate Seasonal Thresholds

In [9]:
# Columns to calculate thresholds for
columns_to_threshold = ['HourlyDryBulbTemperature', 'HourlyRelativeHumidity', 'HourlyWindSpeed']

# Calculate thresholds for each city
dallas_thresholds = calculate_seasonal_thresholds(dallas, columns_to_threshold)
arlington_thresholds = calculate_seasonal_thresholds(arlington, columns_to_threshold)
denton_thresholds = calculate_seasonal_thresholds(denton, columns_to_threshold)

# Print thresholds for verification
print("Dallas Thresholds:", dallas_thresholds)
print("Arlington Thresholds:", arlington_thresholds)
print("Denton Thresholds:", denton_thresholds)

Dallas Thresholds: {'HourlyDryBulbTemperature':         0.25  0.50  0.75
Season                  
Fall    56.0  69.0  78.0
Spring  61.0  69.0  77.0
Summer  80.0  86.0  94.0
Winter  37.0  49.0  61.0, 'HourlyRelativeHumidity':         0.25  0.50   0.75
Season                   
Fall    47.0  63.0  83.00
Spring  47.0  63.0  77.00
Summer  40.0  55.0  68.75
Winter  45.0  63.0  84.00, 'HourlyWindSpeed':         0.25  0.50  0.75
Season                  
Fall     5.0   7.0   9.0
Spring   7.0  11.0  15.0
Summer   6.0   8.0  10.0
Winter   5.0   8.0  11.0}
Arlington Thresholds: {'HourlyDryBulbTemperature':         0.25       0.50  0.75
Season                       
Fall    55.0  66.804656  76.0
Spring  61.0  70.000000  77.0
Summer  79.0  85.000000  93.0
Winter  36.0  49.000000  62.0, 'HourlyRelativeHumidity':          0.25  0.50  0.75
Season                   
Fall    50.00  69.0  86.0
Spring  47.50  64.0  79.0
Summer  40.00  56.5  70.6
Winter  46.75  66.0  85.0, 'HourlyWindSpeed':         0.25  

### Step 2: Apply UHI Classification

In [12]:
# Apply UHI classification
dallas = apply_uhi_classification(dallas, dallas_thresholds)
arlington = apply_uhi_classification(arlington, arlington_thresholds)
denton = apply_uhi_classification(denton, denton_thresholds)

# Display UHI Intensity counts
print("Dallas UHI Intensity Counts:\n", dallas['UHI Intensity'].value_counts())
print("Arlington UHI Intensity Counts:\n", arlington['UHI Intensity'].value_counts())
print("Denton UHI Intensity Counts:\n", denton['UHI Intensity'].value_counts())

Dallas UHI Intensity Counts:
 Medium    7520
Low       2751
High       247
Name: UHI Intensity, dtype: int64
Arlington UHI Intensity Counts:
 Medium    7926
Low       2833
High       163
Name: UHI Intensity, dtype: int64
Denton UHI Intensity Counts:
 Medium    7769
Low       2849
High       190
Name: UHI Intensity, dtype: int64


### Step 3: Save Enhanced Datasets

In [13]:
# Save datasets with UHI classification
save_dataset(dallas, f"{data_folder}/enhanced_dallas_with_uhi.csv")
save_dataset(arlington, f"{data_folder}/enhanced_arlington_with_uhi.csv")
save_dataset(denton, f"{data_folder}/enhanced_denton_with_uhi.csv")

### Completion

In [14]:
print("Feature engineering and UHI classification completed successfully.")

Feature engineering and UHI classification completed successfully.
