<a href="https://colab.research.google.com/github/tomheston/blockchain-ai-climate-framework/blob/main/notebooks/noaa_validation_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

# Parse the NOAA data file
data = []

with open('manilla2024.txt', 'r') as f:
    for line in f:
        parts = line.split()
        if len(parts) < 5:
            continue

        year = int(parts[0])
        month = int(parts[1])
        day = int(parts[2])
        hour = int(parts[3])

        # Temperature in tenths of degrees C
        temp_raw = int(parts[4])
        if temp_raw == -9999:  # Missing data
            continue

        temp_c = temp_raw / 10.0
        dt = datetime(year, month, day, hour)

        data.append({
            'datetime': dt,
            'temp_c': temp_c
        })

df = pd.DataFrame(data)

# Calculate statistics
print(f"Total records: {len(df)}")
print(f"Date range: {df['datetime'].min()} to {df['datetime'].max()}")
print(f"Temp range: {df['temp_c'].min():.1f}°C to {df['temp_c'].max():.1f}°C")

mean_temp = df['temp_c'].mean()
std_temp = df['temp_c'].std()

print(f"\nMean: {mean_temp:.2f}°C")
print(f"Std Dev: {std_temp:.2f}°C")

# Calculate z-scores (YOUR METHOD)
df['z_score'] = (df['temp_c'] - mean_temp) / std_temp
df['anomaly'] = abs(df['z_score']) > 3.0

n_anomalies = df['anomaly'].sum()
print(f"\nAnomalies detected (|z| > 3.0): {n_anomalies}")

if n_anomalies > 0:
    print("\n" + "="*70)
    print("DETECTED ANOMALIES")
    print("="*70)
    anomalies = df[df['anomaly']].sort_values('z_score', key=abs, ascending=False)

    for idx, row in anomalies.iterrows():
        print(f"\n{row['datetime']}")
        print(f"  Temperature: {row['temp_c']:.1f}°C")
        print(f"  Z-score: {row['z_score']:+.2f}σ")
        print(f"  Deviation: {row['temp_c'] - mean_temp:+.1f}°C from mean")

# Export
df[df['anomaly']].to_csv('anomalies.csv', index=False)
print(f"\n\nResults exported to anomalies.csv")

Total records: 8403
Date range: 2024-01-01 00:00:00 to 2024-12-31 23:00:00
Temp range: 22.0°C to 38.0°C

Mean: 29.17°C
Std Dev: 2.56°C

Anomalies detected (|z| > 3.0): 33

DETECTED ANOMALIES

2024-04-25 07:00:00
  Temperature: 38.0°C
  Z-score: +3.44σ
  Deviation: +8.8°C from mean

2024-04-27 07:00:00
  Temperature: 38.0°C
  Z-score: +3.44σ
  Deviation: +8.8°C from mean

2024-04-24 07:00:00
  Temperature: 38.0°C
  Z-score: +3.44σ
  Deviation: +8.8°C from mean

2024-04-27 08:00:00
  Temperature: 38.0°C
  Z-score: +3.44σ
  Deviation: +8.8°C from mean

2024-04-27 06:00:00
  Temperature: 37.9°C
  Z-score: +3.40σ
  Deviation: +8.7°C from mean

2024-04-26 06:00:00
  Temperature: 37.6°C
  Z-score: +3.29σ
  Deviation: +8.4°C from mean

2024-05-13 06:00:00
  Temperature: 37.4°C
  Z-score: +3.21σ
  Deviation: +8.2°C from mean

2024-04-30 06:00:00
  Temperature: 37.3°C
  Z-score: +3.17σ
  Deviation: +8.1°C from mean

2024-04-24 06:00:00
  Temperature: 37.3°C
  Z-score: +3.17σ
  Deviation: +8.1°C 