# Forecasting 

In [2]:
import pandas as pd
import os

In [5]:
root = "NASA Power QC 2001-2024\Structured"
locations = os.listdir(root)
location_paths = [os.path.join(root, loc) for loc in locations]
file_name = os.listdir(location_paths[0])[0]
file_paths = [os.path.join(loc_path, file_name) for loc_path in location_paths]
print(file_paths)

['NASA Power QC 2001-2024\\Structured\\Alicia\\20010101_20251231_STRUCTURED.csv', 'NASA Power QC 2001-2024\\Structured\\Amihan\\20010101_20251231_STRUCTURED.csv', 'NASA Power QC 2001-2024\\Structured\\Apolonio Samson\\20010101_20251231_STRUCTURED.csv', 'NASA Power QC 2001-2024\\Structured\\Aurora\\20010101_20251231_STRUCTURED.csv', 'NASA Power QC 2001-2024\\Structured\\Baesa\\20010101_20251231_STRUCTURED.csv', 'NASA Power QC 2001-2024\\Structured\\Bagbag\\20010101_20251231_STRUCTURED.csv', 'NASA Power QC 2001-2024\\Structured\\Bagong Lipunan Ng Crame\\20010101_20251231_STRUCTURED.csv', 'NASA Power QC 2001-2024\\Structured\\Bagong Pag-Asa\\20010101_20251231_STRUCTURED.csv', 'NASA Power QC 2001-2024\\Structured\\Bagong Silangan\\20010101_20251231_STRUCTURED.csv', 'NASA Power QC 2001-2024\\Structured\\Bagumbayan\\20010101_20251231_STRUCTURED.csv', 'NASA Power QC 2001-2024\\Structured\\Bagumbuhay\\20010101_20251231_STRUCTURED.csv', 'NASA Power QC 2001-2024\\Structured\\Bahay Toro\\20010101

In [9]:
import os
import hashlib
from collections import defaultdict

root = r"NASA Power QC 2001-2024\Structured"

def file_hash(path):
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            h.update(chunk)
    return h.hexdigest()

# list folders
locations = os.listdir(root)
location_paths = [os.path.join(root, loc) for loc in locations]

# compute a content signature for each folder
folder_signatures = {}

for loc, path in zip(locations, location_paths):
    file_list = sorted(os.listdir(path))
    file_hashes = []

    for file_name in file_list:
        file_path = os.path.join(path, file_name)
        if os.path.isfile(file_path):
            file_hashes.append(file_hash(file_path))

    # folder signature is tuple of file hashes in sorted order
    folder_signatures[loc] = tuple(file_hashes)

# group folders with identical signatures
groups = defaultdict(list)

for loc, signature in folder_signatures.items():
    groups[signature].append(loc)

# print only meaningful groups
for signature, grouped_locations in groups.items():
    if len(grouped_locations) > 1:
        print("Identical group:", grouped_locations)


Identical group: ['Alicia', 'Amihan', 'Apolonio Samson', 'Aurora', 'Baesa', 'Bagbag', 'Bagong Lipunan Ng Crame', 'Bagong Pag-Asa', 'Bagong Silangan', 'Bagumbayan', 'Bagumbuhay', 'Bahay Toro', 'Balong Bato', 'Batasan Hills', 'Bayanihan', 'Blue Ridge A', 'Blue Ridge B', 'Botocan', 'Bungad', 'Camp Aguinaldo', 'Capri', 'Central', 'Claro', 'Commonwealth', 'Constitution Hills', 'Culiat', 'Damayan', 'Damayang Lagi', 'Del Monte', 'Dioquino Zobel', 'Don Manuel', 'Doña Imelda', 'Doña Josefa', 'Duyan-Duyan', 'E. Rodriguez', 'East Kamias', 'Escopa I', 'Escopa II', 'Escopa III', 'Escopa IV', 'Fairview', 'Gulod', 'Holy Spirit', 'Horseshoe', 'Immaculate Concepcion', 'Kaligayahan', 'Kalusugan', 'Kamuning', 'Katipunan', 'Kaunlaran', 'Kristong Hari', 'Krus Na Ligas', 'Laging Handa', 'Libis', 'Loyola Heights', 'Malaya', 'Mangga', 'Mariana', 'Mariblo', 'Marilag', 'Masagana', 'Masambong', 'Matalahib', 'Matandang Balara', 'Milagrosa', 'n.a', 'Nagkaisang Nayon', 'Nayong Kanluran', 'Novaliches Proper', 'Obrer

In [7]:
df = pd.read_csv(file_paths[1])
print(df.head())

              DATETIME  ALLSKY_SFC_SW_DWN  WS50M  ALLSKY_NKT  \
0  2024-01-01 00:00:00                0.0   4.65         0.0   
1  2024-01-01 01:00:00                0.0   4.61         0.0   
2  2024-01-01 02:00:00                0.0   4.52         0.0   
3  2024-01-01 03:00:00                0.0   4.53         0.0   
4  2024-01-01 04:00:00                0.0   4.46         0.0   

   ALLSKY_SFC_PAR_TOT  ALLSKY_SFC_SW_DNI  ALLSKY_SFC_SW_DIFF  \
0                 0.0                0.0                 0.0   
1                 0.0                0.0                 0.0   
2                 0.0                0.0                 0.0   
3                 0.0                0.0                 0.0   
4                 0.0                0.0                 0.0   

   ALLSKY_SFC_UV_INDEX  ALLSKY_SFC_UVA  ALLSKY_SFC_UVB  ...   V2M  V50M  \
0                  0.0             0.0             0.0  ... -0.48 -2.11   
1                  0.0             0.0             0.0  ... -0.48 -2.17   
2    

# End