In [1]:
import sqlite3
import config

conn = sqlite3.connect(config.DB_PATH)
cursor = conn.cursor()

# Check fires table structure
print("=" * 50)
print("FIRES TABLE COLUMNS:")
print("=" * 50)
cursor.execute("PRAGMA table_info(fires)")
for row in cursor.fetchall():
    print(f"  {row[1]} ({row[2]})")

# Sample some data
print("\n" + "=" * 50)
print("SAMPLE FIRE DATA:")
print("=" * 50)
cursor.execute("SELECT * FROM fires LIMIT 3")
columns = [desc[0] for desc in cursor.description]
print(f"Columns: {', '.join(columns)}")
print()
for row in cursor.fetchall():
    for col, val in zip(columns, row):
        print(f"  {col}: {val}")
    print()

# Check other tables
print("=" * 50)
print("ALL TABLES:")
print("=" * 50)
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
for table in cursor.fetchall():
    print(f"  ‚úì {table[0]}")

conn.close()

FIRES TABLE COLUMNS:
  id (INTEGER)
  lat (REAL)
  lon (REAL)
  date (TEXT)
  time (TEXT)
  frp (REAL)
  brightness (REAL)
  confidence (TEXT)
  daynight (TEXT)
  satellite (TEXT)

SAMPLE FIRE DATA:
Columns: id, lat, lon, date, time, frp, brightness, confidence, daynight, satellite

  id: 1
  lat: 33.81573
  lon: -118.23775
  date: 2025-01-07
  time: 1007
  frp: 1.05
  brightness: 297.45
  confidence: n
  daynight: N
  satellite: N20

  id: 2
  lat: 34.15556
  lon: -118.19301
  date: 2025-01-07
  time: 1007
  frp: 1.56
  brightness: 309.21
  confidence: n
  daynight: N
  satellite: N20

  id: 3
  lat: 34.29366
  lon: -118.80275
  date: 2025-01-07
  time: 1007
  frp: 1.33
  brightness: 310.25
  confidence: n
  daynight: N
  satellite: N20

ALL TABLES:
  ‚úì fires
  ‚úì elevation_cache
  ‚úì wind_cache
  ‚úì predictions


In [2]:
import rasterio
from pathlib import Path
import config

# Find the elevation file
elev_dir = config.DATA_DIR / "elevation"
elev_files = list(elev_dir.glob("*.tif"))

if elev_files:
    elev_file = elev_files[0]
    print(f"Checking: {elev_file}")
    print("=" * 60)
    
    with rasterio.open(elev_file) as src:
        print(f"Format: {src.driver}")
        print(f"CRS: {src.crs}")
        print(f"Bounds: {src.bounds}")
        print(f"Shape: {src.shape}")
        print(f"Resolution: {src.res}")
        print(f"\nMetadata:")
        print(src.meta)
        print(f"\nTags:")
        print(src.tags())
else:
    print("No elevation file found")

Checking: /Users/pranathy/Documents/Illinois Tech/Semester 4/Wildfire Detection/Wildfire-Detection-CV-Geospatial/wildfire-detection/data/elevation/elevation_2025-01-07.tif
Format: GTiff
CRS: EPSG:4269
Bounds: BoundingBox(left=-119.00027728978193, bottom=33.50027737376589, right=-118.00027728178192, top=34.500277381765905)
Shape: (3600, 3600)
Resolution: (0.00027777778000000425, 0.00027777778000000425)

Metadata:
{'driver': 'GTiff', 'dtype': 'float32', 'nodata': -999999.0, 'width': 3600, 'height': 3600, 'count': 1, 'crs': CRS.from_wkt('GEOGCS["NAD83",DATUM["North_American_Datum_1983",SPHEROID["GRS 1980",6378137,298.257222101004,AUTHORITY["EPSG","7019"]],AUTHORITY["EPSG","6269"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4269"]]'), 'transform': Affine(0.00027777778000000425, 0.0, -119.00027728978193,
       0.0, -0.00027777778000000425, 34.500277381765905)}

Tags:
{'AREA_OR_POINT': 'Are

In [3]:
import sqlite3
import config
from pathlib import Path
import json

def verify_all_data():
    """Verify all downloaded data is complete and accessible"""
    
    print("=" * 60)
    print("DATA VERIFICATION")
    print("=" * 60)
    
    conn = sqlite3.connect(config.DB_PATH)
    cursor = conn.cursor()
    
    # 1. Check fires
    print("\nüî• FIRE DATA:")
    cursor.execute("SELECT date, COUNT(*) FROM fires GROUP BY date ORDER BY date")
    fire_dates = cursor.fetchall()
    
    if fire_dates:
        print(f"  ‚úì {len(fire_dates)} days of data")
        for date, count in fire_dates:
            print(f"    {date}: {count} detections")
        total_fires = sum(count for _, count in fire_dates)
        print(f"  Total: {total_fires} fire detections")
    else:
        print("  ‚úó No fire data found")
    
    # 2. Check elevation
    print("\n‚õ∞Ô∏è  ELEVATION DATA:")
    cursor.execute("SELECT file_path, downloaded_at, metadata FROM elevation_cache")
    elev = cursor.fetchone()
    
    if elev:
        file_path, downloaded_at, metadata = elev
        if Path(file_path).exists():
            file_size = Path(file_path).stat().st_size / (1024 * 1024)  # MB
            print(f"  ‚úì File exists: {file_path}")
            print(f"    Size: {file_size:.2f} MB")
            print(f"    Downloaded: {downloaded_at}")
            
            if metadata:
                meta = json.loads(metadata)
                print(f"    Shape: {meta.get('shape', 'N/A')}")
                print(f"    Resolution: {meta.get('resolution', 'N/A')}")
        else:
            print(f"  ‚úó File missing: {file_path}")
    else:
        print("  ‚úó No elevation data in database")
    
    # 3. Check wind
    print("\nüå¨Ô∏è  WIND DATA:")
    cursor.execute("SELECT date, file_path FROM wind_cache ORDER BY date")
    wind_data = cursor.fetchall()
    
    if wind_data:
        print(f"  ‚úì {len(wind_data)} days of data")
        missing = []
        for date, file_path in wind_data:
            if Path(file_path).exists():
                file_size = Path(file_path).stat().st_size / (1024 * 1024)
                print(f"    {date}: ‚úì ({file_size:.2f} MB)")
            else:
                print(f"    {date}: ‚úó FILE MISSING")
                missing.append(date)
        
        if missing:
            print(f"\n  ‚ö†Ô∏è  {len(missing)} wind files missing!")
    else:
        print("  ‚úó No wind data in database")
    
    conn.close()
    
    # 4. Summary
    print("\n" + "=" * 60)
    print("SUMMARY:")
    print("=" * 60)
    
    has_fires = len(fire_dates) > 0 if fire_dates else False
    has_elevation = elev and Path(elev[0]).exists()
    has_wind = len(wind_data) > 0 if wind_data else False
    all_wind_exist = all(Path(fp).exists() for _, fp in wind_data) if wind_data else False
    
    print(f"üî• Fires: {'‚úì' if has_fires else '‚úó'}")
    print(f"‚õ∞Ô∏è  Elevation: {'‚úì' if has_elevation else '‚úó'}")
    print(f"üå¨Ô∏è  Wind: {'‚úì' if has_wind and all_wind_exist else '‚úó'}")
    
    if has_fires and has_elevation and has_wind and all_wind_exist:
        print("\n‚úÖ ALL DATA READY FOR PROCESSING")
        return True
    else:
        print("\n‚ö†Ô∏è  SOME DATA MISSING - CHECK ABOVE")
        return False

if __name__ == "__main__":
    verify_all_data()

DATA VERIFICATION

üî• FIRE DATA:
  ‚úì 14 days of data
    2025-01-07: 67 detections
    2025-01-08: 885 detections
    2025-01-09: 396 detections
    2025-01-10: 124 detections
    2025-01-11: 243 detections
    2025-01-12: 16 detections
    2025-01-13: 10 detections
    2025-01-14: 10 detections
    2025-01-15: 8 detections
    2025-01-16: 10 detections
    2025-01-17: 3 detections
    2025-01-18: 3 detections
    2025-01-19: 14 detections
    2025-01-20: 1 detections
  Total: 1790 fire detections

‚õ∞Ô∏è  ELEVATION DATA:
  ‚úì File exists: /Users/pranathy/Documents/Illinois Tech/Semester 4/Wildfire Detection/Wildfire-Detection-CV-Geospatial/wildfire-detection/data/elevation/elevation_LA.tif
    Size: 25.37 MB
    Downloaded: 2025-12-28T23:45:54.906013
    Shape: [3600, 3600]
    Resolution: [0.00027777778000000425, 0.00027777778000000425]

üå¨Ô∏è  WIND DATA:
  ‚úì 14 days of data
    2025-01-07: ‚úì (0.03 MB)
    2025-01-08: ‚úì (0.03 MB)
    2025-01-09: ‚úì (0.03 MB)
    2025-01

In [4]:
import pandas as pd
import config
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_pickle(config.DATA_DIR / "processed" / "training_dataset.pkl")

print("=" * 60)
print("LABEL VERIFICATION")
print("=" * 60)

# 1. Check spatial distribution
print("\n1. SPATIAL DISTRIBUTION:")
print(f"   Unique locations with spread=1: {df[df['spread']==1][['target_lat', 'target_lon']].drop_duplicates().shape[0]}")
print(f"   Unique locations with spread=0: {df[df['spread']==0][['target_lat', 'target_lon']].drop_duplicates().shape[0]}")

# 2. Check temporal distribution
print("\n2. TEMPORAL DISTRIBUTION:")
spread_by_date = df.groupby('date')['spread'].agg(['sum', 'count', 'mean'])
spread_by_date.columns = ['spread_yes', 'total', 'spread_rate']
print(spread_by_date)

# 3. Check for suspicious patterns
print("\n3. SPREAD RATE BY DATE:")
for date, rate in zip(spread_by_date.index, spread_by_date['spread_rate']):
    bar = "‚ñà" * int(rate * 50)
    print(f"   {date}: {rate:.2%} {bar}")

# 4. Sample some spread=1 examples
print("\n4. SAMPLE SPREAD=1 EXAMPLES:")
spread_examples = df[df['spread'] == 1].head(5)
for idx, row in spread_examples.iterrows():
    print(f"\n   Example {idx}:")
    print(f"   Location: ({row['target_lat']:.4f}, {row['target_lon']:.4f})")
    print(f"   Date: {row['date']}")
    # Check if center cell of patch has is_burning=1 (would be suspicious)
    patch = row['patch_features']
    center_idx = (5 * 5 // 2) * 7  # Center cell, 7 features per cell
    is_burning = patch[center_idx + 6]  # 7th feature is is_burning
    print(f"   Center cell burning: {is_burning}")

print("\n" + "=" * 60)

LABEL VERIFICATION

1. SPATIAL DISTRIBUTION:
   Unique locations with spread=1: 68
   Unique locations with spread=0: 363

2. TEMPORAL DISTRIBUTION:
            spread_yes  total  spread_rate
date                                      
2025-01-07          21     56     0.375000
2025-01-08          81    331     0.244713
2025-01-09          57    326     0.174847
2025-01-10          29    266     0.109023
2025-01-11           3    150     0.020000
2025-01-12           3     80     0.037500
2025-01-13           4     62     0.064516
2025-01-14           5     59     0.084746
2025-01-15           3     52     0.057692
2025-01-16           1     40     0.025000
2025-01-17           2     24     0.083333
2025-01-18           3     24     0.125000

3. SPREAD RATE BY DATE:
   2025-01-07: 37.50% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
   2025-01-08: 24.47% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
   2025-01-09: 17.48% ‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
   2025-01-10: 10.90% ‚ñà‚ñà‚ñà‚ñà‚ñà
   

In [None]:
import sqlite3
import config
import numpy as np
import rasterio
import richdem as rd
from rasterio.transform import rowcol

print("=" * 60)
print("TERRAIN DATA DEBUGGING")
print("=" * 60)

# Load elevation
conn = sqlite3.connect(config.DB_PATH)
cursor = conn.cursor()
cursor.execute("SELECT file_path FROM elevation_cache LIMIT 1")
dem_file = cursor.fetchone()[0]
conn.close()

with rasterio.open(dem_file) as src:
    dem = src.read(1)
    transform = src.transform

dem_rd = rd.rdarray(dem, no_data=-999999)
slope = rd.TerrainAttribute(dem_rd, attrib='slope_degrees')

print(f"\nDEM Stats:")
print(f"  Shape: {dem.shape}")
print(f"  Min elevation: {dem[dem > -999999].min():.1f}m")
print(f"  Max elevation: {dem[dem > -999999].max():.1f}m")
print(f"  Invalid cells (< 0): {(dem < 0).sum()}")

print(f"\nSlope Stats:")
print(f"  Min slope: {slope[slope > 0].min():.2f}¬∞")
print(f"  Max slope: {slope[slope > 0].max():.2f}¬∞")
print(f"  Zero slope cells: {(slope == 0).sum()}")
print(f"  Invalid slope (==0): {(slope == 0).sum() / slope.size * 100:.1f}%")

# Check grid coverage
deg_per_km = 0.01
grid_step = config.GRID_SIZE_KM * deg_per_km
lats = np.arange(config.LAT_MIN, config.LAT_MAX, grid_step)
lons = np.arange(config.LON_MIN, config.LON_MAX, grid_step)

print(f"\nGrid Coverage Check:")
print(f"  Grid cells: {len(lats)} x {len(lons)} = {len(lats) * len(lons)}")

valid_count = 0
invalid_terrain = 0
out_of_bounds = 0

for i, lat in enumerate(lats):
    for j, lon in enumerate(lons):
        row_idx, col_idx = rowcol(transform, lon, lat)
        
        if not (0 <= row_idx < dem.shape[0] and 0 <= col_idx < dem.shape[1]):
            out_of_bounds += 1
            continue
        
        elevation = float(dem[row_idx, col_idx])
        slope_val = float(slope[row_idx, col_idx])
        
        if elevation < 0 or slope_val == 0:
            invalid_terrain += 1
            continue
        
        valid_count += 1

print(f"  Valid cells: {valid_count}")
print(f"  Out of DEM bounds: {out_of_bounds}")
print(f"  Invalid terrain (elev<0 or slope=0): {invalid_terrain}")
print(f"  Coverage: {valid_count / (len(lats) * len(lons)) * 100:.1f}%")

# Check specific problem areas
print(f"\nChecking why cells are invalid:")
sample_invalid = []
for i, lat in enumerate(lats[:10]):  # First 10 rows
    for j, lon in enumerate(lons[:10]):
        row_idx, col_idx = rowcol(transform, lon, lat)
        if 0 <= row_idx < dem.shape[0] and 0 <= col_idx < dem.shape[1]:
            elevation = float(dem[row_idx, col_idx])
            slope_val = float(slope[row_idx, col_idx])
            if elevation < 0 or slope_val == 0:
                sample_invalid.append({
                    'lat': lat,
                    'lon': lon,
                    'elevation': elevation,
                    'slope': slope_val
                })

if sample_invalid:
    print(f"  Sample invalid cells:")
    for cell in sample_invalid[:5]:
        print(f"    ({cell['lat']:.4f}, {cell['lon']:.4f}): elev={cell['elevation']:.1f}, slope={cell['slope']:.2f}")

print("=" * 60)

TERRAIN DATA DEBUGGING



A Slope calculation (degrees)[39m
C Horn, B.K.P., 1981. Hill shading and the reflectance map. Proceedings of the IEEE 69, 14‚Äì47. doi:10.1109/PROC.1981.11918[39m




DEM Stats:
  Shape: (3600, 3600)
  Min elevation: -5.0m
  Max elevation: 2167.9m
  Invalid cells (< 0): 108140

Slope Stats:
  Min slope: 0.00¬∞
  Max slope: 89.21¬∞
  Zero slope cells: 5115770
  Invalid slope (==0): 39.5%

Grid Coverage Check:
  Grid cells: 100 x 100 = 10000
  Valid cells: 5930
  Out of DEM bounds: 100
  Invalid terrain (elev<0 or slope=0): 3970
  Coverage: 59.3%

Checking why cells are invalid:
  Sample invalid cells:
    (33.5100, -119.0000): elev=0.0, slope=0.00
    (33.5100, -118.9900): elev=0.0, slope=0.00
    (33.5100, -118.9800): elev=0.0, slope=0.00
    (33.5100, -118.9700): elev=0.0, slope=0.00
    (33.5100, -118.9600): elev=0.0, slope=0.00


In [6]:
import pandas as pd
import numpy as np
import config

df = pd.read_pickle(config.DATA_DIR / "processed" / "training_dataset.pkl")

print("=" * 60)
print("FEATURE SANITY CHECK")
print("=" * 60)

# Sample one patch
sample = df.iloc[0]['patch_features']
print(f"\nSample patch (175 features = 25 cells √ó 7):")
print(f"Total features: {len(sample)}")

# Check each cell in 5x5
for cell_idx in range(25):
    start = cell_idx * 7
    features = sample[start:start+7]
    
    elev, slope, wind_speed, wind_align, dist, frp, is_burn = features
    
    if cell_idx == 0 or cell_idx == 12 or cell_idx == 24:  # Corners and center
        print(f"\nCell {cell_idx} ({'center' if cell_idx==12 else 'corner'}):")
        print(f"  Elevation: {elev:.1f}m")
        print(f"  Slope: {slope:.2f}¬∞")
        print(f"  Wind speed: {wind_speed:.2f} m/s")
        print(f"  Wind alignment: {wind_align:.2f}")
        print(f"  Distance to fire: {dist:.2f} km")
        print(f"  Fire intensity: {frp:.1f} MW")
        print(f"  Is burning: {is_burn}")

# Check for any NaN or infinite values
print("\n" + "=" * 60)
print("DATA QUALITY CHECK")
print("=" * 60)

all_features = np.array([f for patches in df['patch_features'] for f in patches])
print(f"Total feature values: {len(all_features)}")
print(f"NaN values: {np.isnan(all_features).sum()}")
print(f"Infinite values: {np.isinf(all_features).sum()}")
print(f"Min value: {np.min(all_features):.2f}")
print(f"Max value: {np.max(all_features):.2f}")

# Feature ranges
print(f"\n" + "=" * 60)
print("FEATURE RANGES (across all patches)")
print("=" * 60)

features_by_type = {
    'elevation': [],
    'slope': [],
    'wind_speed': [],
    'wind_align': [],
    'distance': [],
    'frp': [],
    'is_burning': []
}

for patches in df['patch_features']:
    for cell_idx in range(25):
        start = cell_idx * 7
        features = patches[start:start+7]
        
        features_by_type['elevation'].append(features[0])
        features_by_type['slope'].append(features[1])
        features_by_type['wind_speed'].append(features[2])
        features_by_type['wind_align'].append(features[3])
        features_by_type['distance'].append(features[4])
        features_by_type['frp'].append(features[5])
        features_by_type['is_burning'].append(features[6])

for name, values in features_by_type.items():
    values = np.array(values)
    print(f"{name:12s}: min={values.min():8.2f}, max={values.max():8.2f}, mean={values.mean():8.2f}")

print("=" * 60)

FEATURE SANITY CHECK

Sample patch (175 features = 25 cells √ó 7):
Total features: 175

Cell 0 (corner):
  Elevation: 36.6m
  Slope: 83.81¬∞
  Wind speed: 1.70 m/s
  Wind alignment: -0.58
  Distance to fire: 0.84 km
  Fire intensity: 11.6 MW
  Is burning: 0.0

Cell 12 (center):
  Elevation: 183.7m
  Slope: 37.99¬∞
  Wind speed: 1.70 m/s
  Wind alignment: -0.24
  Distance to fire: 1.10 km
  Fire intensity: 12.5 MW
  Is burning: 0.0

Cell 24 (corner):
  Elevation: 513.8m
  Slope: 71.94¬∞
  Wind speed: 1.70 m/s
  Wind alignment: -0.96
  Distance to fire: 1.64 km
  Fire intensity: 65.7 MW
  Is burning: 0.0

DATA QUALITY CHECK
Total feature values: 264250
NaN values: 0
Infinite values: 0
Min value: -1.00
Max value: 1707.76

FEATURE RANGES (across all patches)
elevation   : min=    0.00, max= 1707.76, mean=  417.54
slope       : min=    0.00, max=   88.26, mean=   66.93
wind_speed  : min=    0.58, max=    2.55, mean=    1.69
wind_align  : min=   -1.00, max=    1.00, mean=    0.10
distance   