In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.layers import Dense, LSTM, Dropout # type: ignore
from tensorflow.keras.optimizers import Adam # type: ignore
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

In [None]:
DATA_PATH = 'xx1.csv'
SEQUENCE_LENGTH = 7
TRAIN_RATIO = 0.8
EPOCHS = 10
BATCH_SIZE = 64
THRESHOLD = 0.5

FEATURE_COLUMNS = ['PRECIPITATION', 'MAX_TEMP', 'MIN_TEMP', 'AVG_WIND_SPEED', 'TEMP_RANGE', 'WIND_TEMP_RATIO', 'LAGGED_PRECIPITATION', 'LAGGED_AVG_WIND_SPEED', 'DAY_OF_YEAR', 'Winter', 'Spring', 'Summer', 'Fall', 'YEAR', 'MONTH' ]
TARGET_COLUMN = 'FIRE_START_DAY'

In [None]:
print("Loading and preprocessing data...")

# Load data
df = pd.read_csv(DATA_PATH)
df['DATE'] = pd.to_datetime(df['DATE'])

# Sort by date to ensure chronological order
df = df.sort_values('DATE').reset_index(drop=True)

print(f"Dataset shape: {df.shape}")
print(f"Date range: {df['DATE'].min()} to {df['DATE'].max()}")
print(f"Fire events: {df['FIRE_START_DAY'].sum()} out of {len(df)} days ({df['FIRE_START_DAY'].mean():.2%})")

In [None]:
# Check for missing values after loading
print("\nChecking for missing values:")
print(df.isnull().sum())

# Fill missing values in specified columns with the mean
for col in FEATURE_COLUMNS:
    if df[col].isnull().any():
        mean_val = df[col].mean()
        df[col] = df[col].fillna(mean_val)
        print(f"Filled missing values in '{col}' with mean: {mean_val:.2f}")

# Verify missing values are handled
print("\nChecking for missing values after filling:")
print(df.isnull().sum())

df.head()

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
# Take only the first 3650 days for seasonal decomposition
df_1095 = df.head(1095)

# Set the date column as index for the first 3650 days
df_time_indexed_1095 = df_1095.set_index('DATE')

# Use a 365-day period to look for annual seasonal patterns
results = seasonal_decompose(df_time_indexed_1095['FIRE_START_DAY'], period=365)

# Create your own subplots with ultra wide size
fig, axes = plt.subplots(4, 1, figsize=(16, 8))
results.observed.plot(ax=axes[0], title='Observed')
results.trend.plot(ax=axes[1], title='Trend')
results.seasonal.plot(ax=axes[2], title='Seasonal')
results.resid.plot(ax=axes[3], title='Residual')
plt.tight_layout()
plt.show()

In [None]:
# Set date as index for analysis
df_time_indexed = df.set_index('DATE')

# Take only first 3 years (approximately 1095 days)
df_3years = df_time_indexed.head(1095)\
    
# 1. BASIC STATISTICS

print("="*60)
print("DETAILED DATA ANALYSIS - FIRST 3 YEARS")
print("="*60)

In [None]:
print("\n1. DATASET OVERVIEW:")
print(f"   Total records: {len(df_3years):,}")
print(f"   Date range: {df_3years.index.min()} to {df_3years.index.max()}")
print(f"   Duration: {(df_3years.index.max() - df_3years.index.min()).days} days")
print(f"   Years covered: {df_3years.index.max().year - df_3years.index.min().year + 1}")

In [None]:
print("\n2. FIRE EVENTS ANALYSIS:")
fire_events = df_3years['FIRE_START_DAY'].sum()
fire_rate = (fire_events / len(df_3years)) * 100
print(f"   Total fire events: {fire_events:,}")
print(f"   Fire rate: {fire_rate:.2f}%")
print(f"   Average fires per year: {fire_events / 3:.1f}")

In [None]:
print("\n3. FEATURE STATISTICS:")
print(df_3years.describe())