# API Calls

To Improve our model, we'll add additional data regarding the natural environment given the coordinates provided for each fire.

In [51]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import pickle
import time
import sqlite3
import pandas as pd
import re

from pytz import timezone 
from datetime import datetime, timedelta, timezone
from meteostat import Stations, Daily, Point

In [52]:
with open('pickles/df_description.pkl', 'rb') as f:
    df_description = pickle.load(f)
    
with open('pickles/df_model.pkl', 'rb') as f:
    df_model = pickle.load(f)
    
with open('pickles/df_soil.pkl', 'rb') as f:
    df_soil = pickle.load(f)
    
with open('pickles/df_elev.pkl', 'rb') as f:
    df_elev = pickle.load(f)

### OpenMeteo API Elevation Data

To improve our model results, we'll be using the OpenMeteo Library, which takes in coordinates and the date the fire was discovered and returns elevation data. This elevation data is expected to provide clarity regarding the causes of fires, as specific causes are less likely to occur at higher elevations.

In [None]:
url = "https://api.open-meteo.com/v1/elevation"
batch_size = 100
df_elevation = pd.DataFrame(columns=["LATITUDE", "LONGITUDE", "elevation"])

for i in range(0, len(df_description), batch_size):
    df_batch = df_description.iloc[i:i+batch_size]
    
    for index, row in df_batch.iterrows():
        lat, lon = row["LATITUDE"], row["LONGITUDE"]
        params = {
            "latitude": lat,
            "longitude": lon,
        }
        response = requests.get(url, params=params)
        print(response.json())
        elevation = response.json()["elevation"][0]
        df_temp = pd.DataFrame({
            "LATITUDE": [lat],
            "LONGITUDE": [lon],
            "elevation": [elevation]
        })

{'elevation': [109.0]}
{'elevation': [147.0]}
{'elevation': [117.0]}
{'elevation': [109.0]}
{'elevation': [109.0]}
{'elevation': [117.0]}
{'elevation': [117.0]}
{'elevation': [100.0]}
{'elevation': [218.0]}
{'elevation': [109.0]}
{'elevation': [109.0]}
{'elevation': [117.0]}
{'elevation': [430.0]}
{'elevation': [109.0]}
{'elevation': [117.0]}
{'elevation': [74.0]}
{'elevation': [430.0]}
{'elevation': [147.0]}
{'elevation': [147.0]}
{'elevation': [147.0]}
{'elevation': [147.0]}
{'elevation': [430.0]}
{'elevation': [109.0]}


In [None]:
df_elev = pd.concat([df_elevation, df_temp], ignore_index=True)

In [None]:
with open('pickles/df_elev.pkl', 'wb') as f:
    pickle.dump(df_elev, f)

### SoilGrid API

We'll call this API to gather information about the soil conditions at each coordinate. This API call will return a classification of the soil as clay, sand, or silt for a depth of 5-15cm, as well as additional data pertaining to the density and nutrient load of the soil.

Soil conditions are a valuable component in our model as different soil types retain moisture more effectively than others and may support different types of vegetation. These variances in soil types may provide insight into the fuel density load that could sustain a fire at the given coordinates.

In [None]:
url = 'https://rest.isric.org/soilgrids/v2.0/properties/query'
soil_data = []

properties = ['clay', 'sand', 'silt', 'cec', 'cfvo', 'nitrogen', 'soc', 'bdod','phh2o']
depths = ['5-15cm']
values = ['mean']

# Set batch size
batch_size = 6000

# Iterate over batches of coordinates
for i in range(0, len(df_description), batch_size):
    # Get batch of coordinates
    df_batch = df_description.iloc[i:i+batch_size]
    
    # Iterate over coordinates in batch
    for index, row in df_batch.iterrows():
        lat = row['LATITUDE']
        lon = row['LONGITUDE']

        # Make API request
        params = {
            'lon': lon,
            'lat': lat,
            'property': properties,
            'depth': depths,
            'value': values
        }

        headers = {'accept': 'application/json'}
        response = requests.get(url, params=params, headers=headers)
        print(response)
        data = response.json()

        # Extract soil data and add to list of dictionaries
        soil_dict = {'LATITUDE': lat, 'LONGITUDE': lon}
        for layer in data['properties']['layers']:
            name = layer['name']
            if name in ['clay','sand', 'silt','cec','cfvo','nitrogen','soc','bdod','phh2o']:
                for depth in layer['depths']:
                    value = depth['values']['mean']
                    soil_dict[name] = value
        soil_data.append(soil_dict)



In [None]:
df_soil = pd.DataFrame.from_records(soil_data)
df_soil

In [None]:
with open('pickles/df_soil.pkl', 'wb') as f:
    pickle.dump(df_soil, f)

### Merging Dataframes

In [53]:
df_elev = df_elev.dropna()
df_elev['elevation'] = df_elev['elevation'].astype(str).apply(lambda x: [int(float(i)) for i in x.strip('[]').split(',')])

In [54]:
df_elev['elevation'] = [i[0] if isinstance(i, list) else i for i in df_elev['elevation']]

In [55]:
df_elev.dtypes

LATITUDE     float64
LONGITUDE    float64
elevation      int64
dtype: object

In [56]:
df_soil = df_soil.drop_duplicates()
df_elev = df_elev.drop_duplicates()
df_model = df_model.drop_duplicates()

In [57]:
df_merged = pd.merge(df_elev, df_soil, on = ['LATITUDE', 'LONGITUDE'], how='inner')
df_merged.dropna(axis = 0)
df = pd.merge(df_model,df_merged, on = ['LATITUDE', 'LONGITUDE'], how='inner')

In [58]:
df.shape

(76472, 21)

In [59]:
df.dtypes

DISCOVERY_DOY                   int64
STAT_CAUSE_DESCR               object
STATE                          object
SOURCE_SYSTEM_TYPE             object
duration                      float64
FIRE_SIZE_CLASS                object
LATITUDE                      float64
LONGITUDE                     float64
SOURCE_REPORTING_UNIT_NAME     object
FIRE_YEAR                       int64
FIRE_SIZE                     float64
elevation                       int64
bdod                          float64
cec                           float64
cfvo                          float64
clay                          float64
nitrogen                      float64
phh2o                         float64
sand                          float64
silt                          float64
soc                           float64
dtype: object

In [60]:
duplicates = df.duplicated(subset =['LATITUDE', 'LONGITUDE','bdod','sand'])

In [61]:
duplicates.sum()

11598

In [62]:
df = df.drop_duplicates(ignore_index= True)

In [63]:
df = df.dropna()

### Combining Target Variable Bins

Now that we have our complete dataset, we'll further condense our target feature by removing the Miscellaneous feature. Miscellaneous as a cause description doesn't provide enough information about the cause of the fire to accuractely model the result.

In [64]:
df=df[df["STAT_CAUSE_DESCR"]!='Miscellaneous']

we'll then sort the children subclass of the STAT_CAUSE_DESCRIPTION feature to a new subclass called 'negligence' 

In [65]:
df['STAT_CAUSE_DESCR'] = np.where(df['STAT_CAUSE_DESCR'] == 'Children', 'Negligence',df['STAT_CAUSE_DESCR'])

Finally, we'll sort the equipment use subclass into the a new infrastructure subclass. This infrastructure subclass is used to describe all instances of the built environment causing a fire.

In [66]:
df['STAT_CAUSE_DESCR'] = np.where(df['STAT_CAUSE_DESCR'] == 'Equipment Use','Infrastructure', df['STAT_CAUSE_DESCR'])

### Saving dfs as variables using Pickle

In [67]:
with open('pickles/df.pkl', 'wb') as f:
    pickle.dump(df, f)