# Forecasting 

In [2]:
import pandas as pd
import os

In [10]:
root = "NASA Power Metro Manila 2001-2024\Structured"
locations = os.listdir(root)
location_paths = [os.path.join(root, loc) for loc in locations]
file_name = os.listdir(location_paths[0])[0]
file_paths = [os.path.join(loc_path, file_name) for loc_path in location_paths]
print(file_paths)

['NASA Power Metro Manila 2001-2024\\Structured\\Kalookan City\\20010101_20251231_STRUCTURED.csv', 'NASA Power Metro Manila 2001-2024\\Structured\\Las Piñas\\20010101_20251231_STRUCTURED.csv', 'NASA Power Metro Manila 2001-2024\\Structured\\Makati City\\20010101_20251231_STRUCTURED.csv', 'NASA Power Metro Manila 2001-2024\\Structured\\Malabon\\20010101_20251231_STRUCTURED.csv', 'NASA Power Metro Manila 2001-2024\\Structured\\Mandaluyong\\20010101_20251231_STRUCTURED.csv', 'NASA Power Metro Manila 2001-2024\\Structured\\Manila\\20010101_20251231_STRUCTURED.csv', 'NASA Power Metro Manila 2001-2024\\Structured\\Marikina\\20010101_20251231_STRUCTURED.csv', 'NASA Power Metro Manila 2001-2024\\Structured\\Muntinlupa\\20010101_20251231_STRUCTURED.csv', 'NASA Power Metro Manila 2001-2024\\Structured\\Navotas\\20010101_20251231_STRUCTURED.csv', 'NASA Power Metro Manila 2001-2024\\Structured\\Parañaque\\20010101_20251231_STRUCTURED.csv', 'NASA Power Metro Manila 2001-2024\\Structured\\Pasay City

In [15]:
import os
import hashlib
from collections import defaultdict

root = r"NASA Power Metro Manila 2001-2024\Structured"

def file_hash(path):
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            h.update(chunk)
    return h.hexdigest()

def folder_signature(folder):
    h = hashlib.sha256()
    files = sorted(f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f)))

    for file_name in files:
        file_path = os.path.join(folder, file_name)
        # include file marker so merged content still keeps file boundaries
        h.update(b"[START]")
        h.update(file_hash(file_path).encode())  
        h.update(b"[END]")
    return h.hexdigest()

locations = os.listdir(root)
location_paths = [os.path.join(root, loc) for loc in locations]

groups = defaultdict(list)

for loc, path in zip(locations, location_paths):
    sig = folder_signature(path)
    groups[sig].append(loc)

for sig, group in groups.items():
    if len(group) > 1:
        print("Identical group:", group)


Identical group: ['Kalookan City', 'Makati City', 'Mandaluyong', 'Marikina', 'Muntinlupa', 'Parañaque', 'Pasay City', 'Pasig City', 'Pateros', 'Quezon City', 'San Juan', 'Taguig']
Identical group: ['Las Piñas', 'Malabon', 'Manila', 'Navotas', 'Valenzuela']


In [16]:
import os
import pandas as pd
from collections import defaultdict

root = r"NASA Power Metro Manila 2001-2024\Structured"

def df_signature(path):
    df = pd.read_csv(path)
    # sort rows to ignore row order
    df_sorted = df.sort_values(list(df.columns)).reset_index(drop=True)
    # convert to a stable tuple structure
    return (
        tuple(df_sorted.columns),
        tuple(map(tuple, df_sorted.values))
    )

locations = os.listdir(root)
location_paths = [os.path.join(root, loc) for loc in locations]

folder_signatures = {}

for loc, path in zip(locations, location_paths):
    sigs = []
    for file_name in sorted(os.listdir(path)):
        file_path = os.path.join(path, file_name)
        if os.path.isfile(file_path):
            sigs.append(df_signature(file_path))
    folder_signatures[loc] = tuple(sigs)

groups = defaultdict(list)
for loc, sig in folder_signatures.items():
    groups[sig].append(loc)

for sig, locs in groups.items():
    if len(locs) > 1:
        print("Identical group:", locs)


Identical group: ['Kalookan City', 'Makati City', 'Mandaluyong', 'Marikina', 'Muntinlupa', 'Parañaque', 'Pasay City', 'Pasig City', 'Pateros', 'Quezon City', 'San Juan', 'Taguig']
Identical group: ['Las Piñas', 'Malabon', 'Manila', 'Navotas', 'Valenzuela']


In [12]:
df = pd.read_csv(file_paths[1])
print(df.head())

              DATETIME  ALLSKY_SFC_SW_DWN  WS50M  ALLSKY_NKT  \
0  2024-01-01 00:00:00                0.0   4.65         0.0   
1  2024-01-01 01:00:00                0.0   4.61         0.0   
2  2024-01-01 02:00:00                0.0   4.52         0.0   
3  2024-01-01 03:00:00                0.0   4.53         0.0   
4  2024-01-01 04:00:00                0.0   4.46         0.0   

   ALLSKY_SFC_PAR_TOT  ALLSKY_SFC_SW_DNI  ALLSKY_SFC_SW_DIFF  \
0                 0.0                0.0                 0.0   
1                 0.0                0.0                 0.0   
2                 0.0                0.0                 0.0   
3                 0.0                0.0                 0.0   
4                 0.0                0.0                 0.0   

   ALLSKY_SFC_UV_INDEX  ALLSKY_SFC_UVA  ALLSKY_SFC_UVB  ...   V2M  V50M  \
0                  0.0             0.0             0.0  ... -0.48 -2.11   
1                  0.0             0.0             0.0  ... -0.48 -2.17   
2    

# End