# API Exploration

In [1]:
import pandas as pd


MAP_KEY = '4d0ad89f55e8cdd9550531c43b76788f'
url = 'https://firms.modaps.eosdis.nasa.gov/mapserver/mapkey_status/?MAP_KEY=' + MAP_KEY

try:
    df = pd.read_json(url, typ='series')
    display(df)
except ValueError:
    print("Failed to fetch data from the URL. Please check the URL or your internet connection.")

transaction_limit             5000
current_transactions             0
transaction_interval    10 minutes
dtype: object

In [2]:
da_url = (
    "https://firms.modaps.eosdis.nasa.gov/api/data_availability/csv/" + MAP_KEY + "/all"
)
df = pd.read_csv(da_url)
display(df)

Unnamed: 0,data_id,min_date,max_date
0,MODIS_NRT,2025-02-01,2025-05-21
1,MODIS_SP,2000-11-01,2025-01-31
2,VIIRS_NOAA20_NRT,2025-02-01,2025-05-21
3,VIIRS_NOAA20_SP,2018-04-01,2025-01-31
4,VIIRS_NOAA21_NRT,2024-01-17,2025-05-21
5,VIIRS_SNPP_NRT,2025-02-01,2025-05-21
6,VIIRS_SNPP_SP,2012-01-20,2025-01-31
7,LANDSAT_NRT,2022-06-20,2025-05-21
8,GOES_NRT,2022-08-09,2025-05-22
9,BA_MODIS,2000-11-01,2025-02-01


In [3]:
# let's create a simple function that tells us how many transactions we have used.
# We will use this in later examples

def get_transaction_count() :
  count = 0
  try:
    df = pd.read_json(url,  typ='series')
    count = df['current_transactions']
  except:
    print ("Error in our call.")
  return count

tcount = get_transaction_count()
print ('Our current transaction count is %i' % tcount)

Our current transaction count is 5


In [4]:
# now let's see how many transactions we use by querying this end point

start_count = get_transaction_count()
pd.read_csv(da_url)
end_count = get_transaction_count()
print ('We used %i transactions.' % (end_count-start_count))

# now remember, after 10 minutes this will reset

We used 5 transactions.


## API / Area

In [5]:
# in this example let's look at VIIRS NOAA-20, entire world and the most recent day
area_url = 'https://firms.modaps.eosdis.nasa.gov/api/area/csv/' + MAP_KEY + '/VIIRS_NOAA20_NRT/world/1'
start_count = get_transaction_count()
df_area = pd.read_csv(area_url)
end_count = get_transaction_count()
print ('We used %i transactions.' % (end_count-start_count))

df_area

We used 0 transactions.


Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight


## API / Countries

In [6]:
countries_url = 'https://firms.modaps.eosdis.nasa.gov/api/countries'
df_countries = pd.read_csv(countries_url, sep=';')
df_countries

Unnamed: 0,id,abreviation,name,extent
0,1,ABW,Aruba,"BOX(-70.0624080069999 12.417669989,-69.8768204..."
1,2,AFG,Afghanistan,"BOX(60.4867777910001 29.3866053260001,74.89230..."
2,3,AGO,Angola,"BOX(11.6693941430001 -18.0314047239998,24.0617..."
3,4,AIA,Anguilla,"BOX(-63.4288223949999 18.1690941430001,-62.972..."
4,6,ALA,Aland Islands,"BOX(19.5131942070001 59.9044863950001,21.09669..."
...,...,...,...,...
239,234,WSM,Samoa,"BOX(-172.782582161 -14.052829685,-171.43769283..."
240,235,YEM,Yemen,"BOX(42.5457462900001 12.1114436720001,54.54029..."
241,236,ZAF,South Africa,"BOX(16.4699813160001 -46.965752863,37.97779381..."
242,237,ZMB,Zambia,"BOX(21.9798775630001 -18.0692318719999,33.6742..."


## Country abbreviation
- South Korea -> abbreviation = `KOR`
- Nepal -> abbreviation = `NPL`

In [7]:
df_countries[df_countries['abreviation'] == 'KOR']

Unnamed: 0,id,abreviation,name,extent
117,115,KOR,Republic of Korea,"BOX(124.613617384 33.197577216,131.862521886 3..."


## Country wise `csv` data

In [8]:
# Let's see last four days MODIS data for Peru
korea_url = 'https://firms.modaps.eosdis.nasa.gov/api/country/csv/' + MAP_KEY + '/MODIS_NRT/KOR/10'
korea_df = pd.read_csv(korea_url)
korea_df

Unnamed: 0,country_id,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,KOR,37.14691,128.15056,310.71,1.46,1.19,2025-05-14,134,Terra,MODIS,48,6.1NRT,299.97,10.81,D
1,KOR,37.56362,127.85799,307.59,1.38,1.16,2025-05-14,134,Terra,MODIS,24,6.1NRT,293.26,9.16,D
2,KOR,35.09536,128.85632,313.75,1.78,1.31,2025-05-14,137,Terra,MODIS,0,6.1NRT,300.09,20.03,D
3,KOR,35.09666,128.1938,309.57,1.62,1.25,2025-05-14,137,Terra,MODIS,20,6.1NRT,293.52,11.78,D
4,KOR,35.14049,128.86945,312.14,1.78,1.31,2025-05-14,137,Terra,MODIS,26,6.1NRT,299.55,17.0,D
5,KOR,35.14645,128.17204,307.94,1.61,1.25,2025-05-14,137,Terra,MODIS,35,6.1NRT,294.74,9.19,D
6,KOR,35.94226,128.29424,313.52,1.58,1.24,2025-05-14,137,Terra,MODIS,29,6.1NRT,300.28,11.75,D
7,KOR,36.14408,128.45381,312.91,1.6,1.24,2025-05-14,137,Terra,MODIS,35,6.1NRT,300.83,13.13,D
8,KOR,36.59377,128.19667,312.04,1.51,1.21,2025-05-14,137,Terra,MODIS,46,6.1NRT,299.94,10.88,D
9,KOR,36.9856,126.70081,306.25,1.02,1.01,2025-05-14,1236,Terra,MODIS,58,6.1NRT,291.32,7.04,N


In [9]:
len(korea_df)

18

# Pre-processing

In [None]:
# install the dotenv package to read the .env file
# !pip install python-dotenv scikit-learn joblib 

In [10]:
from dotenv import load_dotenv
import os

# Load the .env file
load_dotenv()

# Access the MAP_KEY
MAP_KEY = os.getenv("MAP_KEY")

print("MAP_KEY:", MAP_KEY)  # Optional, remove in production


MAP_KEY: None


In [11]:
import pandas as pd
from pathlib import Path

root_dir = Path("../data/processed")
file_list = []
for file in os.listdir(root_dir):
    if "combined.csv" in file.split("_"):
        file_path = root_dir / file
        file_list.append(file_path)
print(file_list)

[WindowsPath('../data/processed/korea_combined.csv'), WindowsPath('../data/processed/nepal_combined.csv')]


In [12]:
# Combine Nepal and Korea data into a single CSV file
combined_data_path = Path("../data")
nepal_df = pd.read_csv(file_list[0])
korea_df = pd.read_csv(file_list[1])
# rename column datetime to date and datetime.1 to time
nepal_df.rename(columns={"datetime": "date", "datetime.1": "time"}, inplace=True)
combined_data_df = pd.concat([nepal_df, korea_df], ignore_index=True)
combined_data_df.to_csv(combined_data_path / "combined_data.csv", index=False)
combined_data_df.head()

Unnamed: 0,latitude,longitude,brightness,scan,track,satellite,instrument,confidence,version,bright_t31,frp,daynight,type,date,time
0,36.9854,126.7148,301.3,1.0,1.0,Terra,MODIS,40,6.03,278.4,7.6,D,2,24-Jan-2020,2:22:00 AM
1,35.9964,129.3935,304.8,1.3,1.1,Aqua,MODIS,60,6.03,287.2,10.3,D,2,1-Feb-2020,4:49:00 AM
2,37.0491,126.5183,300.4,1.1,1.0,Aqua,MODIS,0,6.03,279.7,7.1,D,2,3-Feb-2020,4:37:00 AM
3,36.0207,126.9367,312.4,1.1,1.0,Aqua,MODIS,46,6.03,283.5,12.9,D,0,3-Feb-2020,4:37:00 AM
4,36.1727,126.9524,302.1,1.0,1.0,Aqua,MODIS,28,6.03,279.2,6.6,D,0,3-Feb-2020,4:37:00 AM


# Model Training

### Feature Selection

In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib

# these are the columns we don't need
unnecessary_columns = ["version", "instrument", "type", "satellite", "date", "time"]
# filter out unnecessary columns
selected_columns = [
    col for col in combined_data_df.columns if col not in unnecessary_columns
]
combined_data_df = combined_data_df[selected_columns]

X = combined_data_df.drop(columns=["confidence"])
X["daynight"] = X["daynight"].map({"D": 1, "N": 0})

y = combined_data_df["confidence"].values.ravel()

# Split the combined data ensuring stratification if applicable
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### Train the Model

In [14]:
from sklearn.ensemble import RandomForestRegressor

# regressor model
model = RandomForestRegressor(max_features='log2', min_samples_leaf=2, n_estimators=200, random_state=42)
model.fit(X_train_scaled, y_train)
# Save the model
joblib.dump(model, "wildfire_predictor_model.pkl")

['wildfire_predictor_model.pkl']

### Model testing

In [15]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"Mean Absolute Error: {mae}")
    print(f"Mean Squared Error: {mse}")
    print(f"R^2 Score: {r2}")

# Evaluate the model
evaluate_model(model, X_test_scaled, y_test)

# Save the scaler
joblib.dump(scaler, "wildfire_predictor_scaler.pkl")

Mean Absolute Error: 8.51540961223337
Mean Squared Error: 139.872240387292
R^2 Score: 0.6541986812538385


['wildfire_predictor_scaler.pkl']

# Get new data

In [None]:
import pandas as pd

try:
    npl_url = f"https://firms.modaps.eosdis.nasa.gov/api/country/csv/{MAP_KEY}/MODIS_NRT/NPL/10"
    npl_data = pd.read_csv(npl_url)
    display(npl_data.head())
    npl_original = npl_data.copy()
except Exception as e:
    print("🔥 Failed to load Nepal wildfire data:", e)    

Unnamed: 0,Invalid MAP_KEY.


In [20]:
npl_data = npl_data.drop(columns=["version", "country_id", "instrument", "acq_date", "acq_time", "satellite"])
npl_data["daynight"] = npl_data["daynight"].map({"D": 1, "N": 0})
npl_data = npl_data.drop(columns=["confidence"])
npl_data_scaled = scaler.transform(npl_data)
npl_data_predictions = model.predict(npl_data_scaled)

for i, pred in enumerate(npl_data_predictions):
    print(f"Prediction for row {i+1}: {pred:.1f}")

KeyError: "['version', 'country_id', 'instrument', 'acq_date', 'acq_time', 'satellite'] not found in axis"

In [18]:
npl_original['confidence'].head()

NameError: name 'npl_original' is not defined

In [14]:
npl_original.head()

Unnamed: 0,country_id,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight
0,NPL,27.27485,84.68124,317.64,2.71,1.58,2025-05-11,434,Terra,MODIS,44,6.1NRT,297.55,30.62,D
1,NPL,28.3959,81.4378,310.72,1.12,1.05,2025-05-11,1531,Terra,MODIS,76,6.1NRT,297.61,7.37,N
2,NPL,28.76836,81.34875,324.38,1.9,1.35,2025-05-12,920,Aqua,MODIS,43,6.1NRT,305.38,19.99,D
3,NPL,28.43337,81.10894,331.01,1.0,1.0,2025-05-13,417,Terra,MODIS,81,6.1NRT,307.6,16.2,D
4,NPL,28.20702,81.38847,332.81,1.02,1.01,2025-05-14,858,Aqua,MODIS,77,6.1NRT,317.55,12.78,D
