# Pre-processing

In [None]:
# install the dotenv package to read the .env file
# !pip install python-dotenv scikit-learn joblib 

In [2]:
from dotenv import load_dotenv
import os

# Load the .env file
load_dotenv()

# Access the MAP_KEY
MAP_KEY = os.getenv("MAP_KEY")

print("MAP_KEY:", MAP_KEY)  # Optional, remove in production


MAP_KEY: 4d0ad89f55e8cdd9550531c43b76788f


In [3]:
import pandas as pd
from pathlib import Path

root_dir = Path("../data/processed")
file_list = []
for file in os.listdir(root_dir):
    if "combined.csv" in file.split("_"):
        file_path = root_dir / file
        file_list.append(file_path)
print(file_list)

[PosixPath('../data/processed/nepal_combined.csv'), PosixPath('../data/processed/korea_combined.csv')]


In [25]:
# Combine Nepal and Korea data into a single CSV file
combined_data_path = Path("../data")
nepal_df = pd.read_csv(file_list[0])
korea_df = pd.read_csv(file_list[1])
# rename column datetime to date and datetime.1 to time
nepal_df.rename(columns={"datetime": "date", "datetime.1": "time"}, inplace=True)
combined_data_df = pd.concat([nepal_df, korea_df], ignore_index=True)
combined_data_df.to_csv(combined_data_path / "combined_data.csv", index=False)
combined_data_df.head()

Unnamed: 0,latitude,longitude,brightness,scan,track,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,date,time
0,28.5115,81.2492,300.0,1.5,1.2,Aqua,MODIS,24,6.03,288.2,6.7,D,0,13-Jan-2020,7:34:00 AM
1,28.5604,82.72,305.1,1.2,1.1,Aqua,MODIS,45,6.03,294.2,8.2,D,0,13-Jan-2020,7:34:00 AM
2,26.6773,86.9825,317.9,1.1,1.1,Aqua,MODIS,77,6.03,295.8,14.0,D,0,15-Jan-2020,7:21:00 AM
3,28.3409,84.435,304.1,1.0,1.0,Aqua,MODIS,58,6.03,282.9,8.3,D,0,20-Jan-2020,7:40:00 AM
4,28.6669,82.7348,307.1,1.5,1.2,Aqua,MODIS,53,6.03,284.5,16.1,D,0,22-Jan-2020,7:28:00 AM


# Model Training

### Feature Selection

In [26]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib

# these are the columns we don't need
unnecessary_columns = ["version", "instrument", "type", "satellite", "date", "time"]
# filter out unnecessary columns
selected_columns = [
    col for col in combined_data_df.columns if col not in unnecessary_columns
]
combined_data_df = combined_data_df[selected_columns]

X = combined_data_df.drop(columns=["confidence"])
X["daynight"] = X["daynight"].map({"D": 1, "N": 0})

y = combined_data_df["confidence"].values.ravel()

# Split the combined data ensuring stratification if applicable
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [27]:
combined_data_df.columns

Index(['latitude', 'longitude', 'brightness', 'scan', 'track', 'confidence',
       'bright_t31', 'frp', 'daynight'],
      dtype='object')

### Train the Model

In [14]:
from sklearn.ensemble import RandomForestRegressor

# regressor model
model = RandomForestRegressor(max_features='log2', min_samples_leaf=2, n_estimators=200, random_state=42)
model.fit(X_train_scaled, y_train)
# Save the model
joblib.dump(model, "wildfire_predictor_model.pkl")

['wildfire_predictor_model.pkl']

### Model testing

In [15]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"Mean Absolute Error: {mae}")
    print(f"Mean Squared Error: {mse}")
    print(f"R^2 Score: {r2}")

# Evaluate the model
evaluate_model(model, X_test_scaled, y_test)

# Save the scaler
joblib.dump(scaler, "wildfire_predictor_scaler.pkl")

Mean Absolute Error: 8.51540961223337
Mean Squared Error: 139.872240387292
R^2 Score: 0.6541986812538385


['wildfire_predictor_scaler.pkl']

# Get new data

In [28]:
import pandas as pd

try:
    npl_url = f"https://firms.modaps.eosdis.nasa.gov/api/country/csv/{MAP_KEY}/MODIS_NRT/NPL/10"
    npl_data = pd.read_csv(npl_url)
    display(npl_data.head())
    npl_original = npl_data.copy()
except Exception as e:
    print("🔥 Failed to load Nepal wildfire data:", e)    

Unnamed: 0,country_id,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,NPL,28.43337,81.10894,331.01,1.0,1.0,2025-05-13,417,Terra,MODIS,81,6.1NRT,307.6,16.2,D
1,NPL,28.20702,81.38847,332.81,1.02,1.01,2025-05-14,858,Aqua,MODIS,77,6.1NRT,317.55,12.78,D
2,NPL,28.55418,80.84904,327.92,1.0,1.0,2025-05-14,858,Aqua,MODIS,70,6.1NRT,311.51,10.81,D
3,NPL,28.55987,80.88885,326.4,1.01,1.0,2025-05-14,858,Aqua,MODIS,61,6.1NRT,310.56,9.7,D
4,NPL,28.75919,80.41531,326.49,1.0,1.0,2025-05-14,858,Aqua,MODIS,56,6.1NRT,313.54,7.13,D


In [20]:
npl_data = npl_data.drop(columns=["version", "country_id", "instrument", "acq_date", "acq_time", "satellite"])
npl_data["daynight"] = npl_data["daynight"].map({"D": 1, "N": 0})
npl_data = npl_data.drop(columns=["confidence"])
npl_data_scaled = scaler.transform(npl_data)
npl_data_predictions = model.predict(npl_data_scaled)

for i, pred in enumerate(npl_data_predictions):
    print(f"Prediction for row {i+1}: {pred:.1f}")

KeyError: "['version', 'country_id', 'instrument', 'acq_date', 'acq_time', 'satellite'] not found in axis"

In [18]:
npl_original['confidence'].head()

NameError: name 'npl_original' is not defined

In [14]:
npl_original.head()

Unnamed: 0,country_id,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,NPL,27.27485,84.68124,317.64,2.71,1.58,2025-05-11,434,Terra,MODIS,44,6.1NRT,297.55,30.62,D
1,NPL,28.3959,81.4378,310.72,1.12,1.05,2025-05-11,1531,Terra,MODIS,76,6.1NRT,297.61,7.37,N
2,NPL,28.76836,81.34875,324.38,1.9,1.35,2025-05-12,920,Aqua,MODIS,43,6.1NRT,305.38,19.99,D
3,NPL,28.43337,81.10894,331.01,1.0,1.0,2025-05-13,417,Terra,MODIS,81,6.1NRT,307.6,16.2,D
4,NPL,28.20702,81.38847,332.81,1.02,1.01,2025-05-14,858,Aqua,MODIS,77,6.1NRT,317.55,12.78,D
