# Physics-Informed Neural Networks for Enhanced Heat Index Forecasting in the Philippine Context


# Import Python Libraries

In [43]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [44]:
import torch
torch.cuda.is_available()

True

In [45]:
torch.cuda.current_device()
torch.cuda.device(0)

<torch.cuda.device at 0x1d61ae2e000>

In [46]:
# Check GPU details
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 5070 Ti'

## Data Loading

Climatological data from DOST-PAGASA (http://bagong.pagasa.dost.gov.ph/climate/climate-data)
Notes:
- 999.0 indicate Missing Values
- TMAX (Maximum Temperature, Unit: Deg C)
- TMIN (Minimum Temperature, Unit: Deg C)
- RH (Relative Humidity, Unit: %)
- Wind Speed, Unit: m/s
- Wind direction, Unit: degree relative to North indicating where the wind is blowing from

Stations:
- NAIA Latitude: 14.5047 N Longitude: 121.004751 E Elevation: 21 m
- Port Area Latitude: 14.58841 N Longitude: 120.967866 E Elevation: 15 m
- Science Garden Latitude: 14.645072 N Longitude: 121.044282 E Elevation: 42 m
- Baguio Latitude: 16.403992 N Longitude: 120.60147 E Elevation: 1510.08 m
- Laoag Latitude: 18.183016 N Longitude: 120.534723 E Elevation: 5 m
- Sinait Latitude: 17.89019 N Longitude: 120.459762 E Elevation: 58.129 m
- Dagupan Latitude: 16.086784 N Longitude: 120.352045 E Elevation: 2 m
- Tuguegarao Latitude: 17.647678 N Longitude: 121.758469 E Elevation: 62 m
- Aparri Latitude: 18.360059 N Longitude: 121.630454 E Elevation: 3 m
- Basco Radar Latitude: 20.427284 N Longitude: 121.970536 E Elevation: 167 m
- Baler Radar Latitude: 15.748809 N Longitude: 121.632028 E Elevation: 173 m
- Cabanatuan Latitude: 15.470387 N Longitude: 120.951143 E Elevation: 32 m
- CLSU Latitude: 15.73586 N Longitude: 120.9368 E Elevation: 7.6 m
- Clark Latitude: 15.1717 N Longitude: 120.5616667 E Elevation: 151.564 m
- Iba Latitude: 15.328408 N Longitude: 119.965661 E Elevation: 5.538 m
- Ambulong Latitude: 14.09008056 N Longitude: 121.0552444 E Elevation: 11 m
- Sangley Point Latitude: 14.494953 N Longitude: 120.906838 E Elevation: 3 m
- Tanay Latitude: 14.581167 N Longitude: 121.36927 E Elevation: 650 m
- Infanta Latitude: 14.74663611 N Longitude: 121.6490333 E Elevation: 7 m
- Tayabas Latitude: 14.018428 N Longitude: 121.596575 E Elevation: 158 m
- Calapan Latitude: 13.409775 N Longitude: 121.1896667 E Elevation: 41 m
- San Jose Latitude: 12.359602 N Longitude: 121.04788 E Elevation: 3.314 m
- Puerto Princesa Latitude: 9.740134 N Longitude: 118.758613 E Elevation: 16.76 m
- Daet Latitude: 14.128689 N Longitude: 122.982559 E Elevation: 3.77 m
- Legazpi Latitude: 13.150937 N Longitude: 123.728605 E Elevation: 15.487 m
- Virac Synop Latitude: 13.576558 N Longitude: 124.209834 E Elevation: 31.574 m


In [47]:
# Store individual datasets into a dictionary of dataframes
df_ambulong = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Ambulong Daily Data.csv')
df_aparri = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Aparri Daily Data.csv')
df_baguio = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Baguio Daily Data.csv')
df_baler = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Baler Radar Daily Data.csv')
df_basco = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Basco Radar Daily Data.csv')
df_cabanatuan = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Cabanatuan Daily Data.csv')
df_calapan = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Calapan Daily Data.csv')
df_clark = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Clark Daily Data.csv')
df_clsu = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/CLSU Daily Data.csv')
df_daet = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Daet Daily Data.csv')
df_dagupan = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Dagupan Daily Data.csv')
df_iba = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Iba Daily Data.csv')
df_infanta = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Infanta Daily Data.csv')
df_laoag = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Laoag Daily Data.csv')
df_legazpi = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Legazpi Daily Data.csv')
df_naia = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/NAIA Daily Data.csv')
df_port = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Port Area Daily Data.csv')
df_puerto = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Puerto Princesa Daily Data.csv')
df_sanjose = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/San Jose Daily Data.csv')
df_sangley = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Sangley Point Daily Data.csv')
df_sciencegarden = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Science Garden Daily Data.csv')
df_sinait = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Sinait Daily Data.csv')
df_tanay = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Tanay Daily Data.csv')
df_tayabas = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Tayabas Daily Data.csv')
df_tuguegarao = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Tuguegarao Daily Data.csv')
df_virac = pd.read_csv('C:/Users/Blix/Documents/GitHub/CCTHESS1-CCTHESS2-Dev-and-Docs/datasets/Virac Synop Daily Data.csv')

In [48]:
# Store dataframes in a list for easy access
dataframes = [
    df_ambulong, df_aparri, df_baguio, df_baler, df_basco, df_cabanatuan, df_calapan, df_clark, df_clsu, df_daet, df_dagupan, df_iba, df_infanta, df_laoag, df_legazpi, df_naia,
    df_port, df_puerto, df_sanjose, df_sangley, df_sciencegarden, df_sinait, df_tanay, df_tayabas, df_tuguegarao, df_virac
]
# Corresponding names for the dataframes
dataframe_names = [
    'Ambulong', 'Aparri', 'Baguio', 'Baler', 'Basco', 'Cabanatuan', 'Calapan', 'Clark', 'CLSU', 'Daet', 'Dagupan', 'Iba', 'Infanta', 'Laoag', 'Legazpi', 'NAIA', 
    'Port Area', 'Puerto Princesa', 'San Jose', 'Sangley Point', 'Science Garden', 'Sinait', 'Tanay', 'Tayabas', 'Tuguegarao', 'Virac Synop'
]

## Initial Sanity Check

In [51]:
def sanity_check(df):
    # Print name of dataset (e.g. df_ambulong)
    print(f"Sanity Check for dataset: {df.name}")
    print("First 5 rows of the dataframe:")
    print(df.head())
    print("\nDataframe Info:")
    print(df.info())
    print("\nMissing Values in each column (containing -999.0):")
    print((df == -999.0).sum())
    print("\nDuplicate values in the dataframe:")
    print(df.duplicated().sum())
    print("\nStatistical Summary:")
    print(df.describe())

In [52]:
for df, name in zip(dataframes, dataframe_names):
    df.name = name  # Assign name attribute to dataframe
    sanity_check(df)
    print("\n" + "="*50 + "\n")

Sanity Check for dataset: Ambulong
First 5 rows of the dataframe:
   YEAR  MONTH  DAY  TMAX  TMIN  RH  WIND_SPEED  WIND_DIRECTION
0  2014      1    1  30.4  22.8  74           2              40
1  2014      1    2  29.9  20.0  74           2              40
2  2014      1    3  31.0  20.5  79           1              40
3  2014      1    4  32.0  22.5  76           1              40
4  2014      1    5  30.6  23.0  75           2              40

Dataframe Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3652 entries, 0 to 3651
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   YEAR            3652 non-null   int64  
 1   MONTH           3652 non-null   int64  
 2   DAY             3652 non-null   int64  
 3   TMAX            3652 non-null   float64
 4   TMIN            3652 non-null   float64
 5   RH              3652 non-null   int64  
 6   WIND_SPEED      3652 non-null   int64  
 7   WIND_DIRECTION  3

Important Null Value Findings:
- Abulong: 17
- Aparri: 1414
- Baler: 1 
- Basco: 244
- Cabanatuan: 1 
- Calapan: 1 
- Clark: 0
- CLSU: 0
- Daet: 0
- Dagupan: 244
- Iba: 0
- Infanta: 0
- Laoag: 1 
- Legazpi: 0
- NAIA: 79
- Port Area: 26
- Puerto Princesa: 0
- San Jose: 0
- Sangley Point: 1 
- Science Garden: 4
- Sinait: 0 
- Tanay: 1 
- Tayabas: 0
- Tuguegarao: 116
- Virac Synop: 0