# Imports

In [1]:
import pandas as pd
import os
from dotenv import load_dotenv
import numpy as np
import json

load_dotenv()

True

# Cleaning Data Logic

In [95]:
def clean_data():
    cwd = os.getcwd()
    wildfire_data_url = cwd + r"\Data\California_Fire_Incidents.csv"

    columns = ['AcresBurned', 'Counties', 'Started']

    wildfire_data = pd.read_csv(wildfire_data_url)
    wildfire_data = wildfire_data[columns]

    # Drop Nan Values (Should only be 3 in Acres Burned)
    wildfire_data = wildfire_data.dropna()

    # Drop Unwanted Counties Datapoints < 4
    county_dataCount = [wildfire_data[wildfire_data["Counties"] == county].shape[0] for county in wildfire_data["Counties"].unique()]
    county_data = dict(zip(counties, county_dataCount))

    counties_toDrop = [county for county, count in county_data.items() if count < 4]
    counties_toDrop.append("Nevada")
    wildfire_data = wildfire_data[~wildfire_data["Counties"].isin(counties_toDrop)]

    # Cleanup Date
    wildfire_data["Started_Date"] = wildfire_data["Started"].apply(lambda x: x[:10])
    wildfire_data["Started_Hour"] = wildfire_data["Started"].apply(lambda x: x[11:13])

    # Add CA to Count
    wildfire_data["Counties"] = wildfire_data["Counties"].apply(lambda x: x + ", CA")

    # Save Cleaned Data
    cleaned_wildfire_data_url = cwd + r"\Cleaned Data\Cleaned_California_Fire_Incidents.csv"
    wildfire_data.to_csv(cleaned_wildfire_data_url, index=False)

    return wildfire_data

# Testing a Single API Call

In [12]:
result = get_weather_data(request_args = ("2013-08-17", "2013-08-17", "Ventura, CA"))

In [48]:
print(result)
print(result.status_code)
print(result.status_code == 200)
print(json.loads(result.json()["data"])["data"][11][4:])
print(len(json.loads(result.json()["data"])["data"]))
print(type(json.loads(result.json()["data"])["data"]))

<Response [200]>
200
True
[15.16, 0.84, 0.85, 100.45, 1.37, 0.43]
24
<class 'list'>


# Testing Adding API Data

In [90]:
def addWeatherData(row):
    print(row.name)
    # API Call
    result = get_weather_data(request_args = (row["Started_Date"], row["Started_Date"], row["Counties"]))
    if result.status_code != 200:
        return pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan])
    # Get Started Hour Weather Data
    result = json.loads(result.json()["data"])["data"][int(row["Started_Hour"])][4:]
    return pd.Series(result)

In [86]:
test = wildfire_data.head(5).copy()
print(test)
print(test.shape)
test[['temperature', 'relative_humidity', 'wind_speed', 'wind_direction', 
                'total_precipitation', 'volumetric_soil_water_layer_1']] = test.apply(addWeatherData, axis=1)
test.head()

   AcresBurned         Counties               Started Started_Date  \
0     257314.0     Tuolumne, CA  2013-08-17T15:25:00Z   2013-08-17   
1      30274.0  Los Angeles, CA  2013-05-30T15:28:00Z   2013-05-30   
2      27531.0    Riverside, CA  2013-07-15T13:43:00Z   2013-07-15   
3      27440.0       Placer, CA  2013-08-10T16:30:00Z   2013-08-10   
4      24251.0      Ventura, CA  2013-05-02T07:01:00Z   2013-05-02   

  Started_Hour  
0           15  
1           15  
2           13  
3           16  
4           07  
(5, 5)
Tuolumne, CA 2013-08-17 15
Los Angeles, CA 2013-05-30 15
Riverside, CA 2013-07-15 13
Placer, CA 2013-08-10 16
Ventura, CA 2013-05-02 07


Unnamed: 0,AcresBurned,Counties,Started,Started_Date,Started_Hour,temperature,relative_humidity,wind_speed,wind_direction,total_precipitation,volumetric_soil_water_layer_1
0,257314.0,"Tuolumne, CA",2013-08-17T15:25:00Z,2013-08-17,15,21.28,0.29,1.3,58.58,0.0,0.13
1,30274.0,"Los Angeles, CA",2013-05-30T15:28:00Z,2013-05-30,15,19.81,0.56,1.34,174.3,0.0,0.07
2,27531.0,"Riverside, CA",2013-07-15T13:43:00Z,2013-07-15,13,22.62,0.62,2.77,256.01,0.0,0.1
3,27440.0,"Placer, CA",2013-08-10T16:30:00Z,2013-08-10,16,19.13,0.3,1.26,204.91,0.0,0.22
4,24251.0,"Ventura, CA",2013-05-02T07:01:00Z,2013-05-02,7,16.4,0.94,0.19,81.05,2.41,0.45


In [91]:
test1 = wildfire_data.head(2).copy()
print(test1)
print(test1.shape)
test1[['temperature', 'relative_humidity', 'wind_speed', 'wind_direction', 
                'total_precipitation', 'volumetric_soil_water_layer_1']] = test1.apply(addWeatherData, axis=1)
test1.head()

   AcresBurned         Counties               Started Started_Date  \
0     257314.0     Tuolumne, CA  2013-08-17T15:25:00Z   2013-08-17   
1      30274.0  Los Angeles, CA  2013-05-30T15:28:00Z   2013-05-30   

  Started_Hour  
0           15  
1           15  
(2, 5)
0
Tuolumne, CA 2013-08-17 15
1
Los Angeles, CA 2013-05-30 15


Unnamed: 0,AcresBurned,Counties,Started,Started_Date,Started_Hour,temperature,relative_humidity,wind_speed,wind_direction,total_precipitation,volumetric_soil_water_layer_1
0,257314.0,"Tuolumne, CA",2013-08-17T15:25:00Z,2013-08-17,15,21.28,0.29,1.3,58.58,0.0,0.13
1,30274.0,"Los Angeles, CA",2013-05-30T15:28:00Z,2013-05-30,15,19.81,0.56,1.34,174.3,0.0,0.07


# Testing If Counties Valid
Did manual Testing for Counties that appeared before Fresno

In [33]:
counties_temp = ['Fresno',
 'Siskiyou', 'Humboldt', 'Tehama', 'Shasta', 'San Diego', 'Kern', 'Sonoma',
 'Contra Costa', 'Butte', 'Tulare', 'Santa Barbara', 'Mariposa', 'Monterey',
 'El Dorado', 'San Bernardino', 'Plumas', 'Modoc', 'San Luis Obispo', 'Madera',
 'Inyo', 'Napa', 'San Benito', 'San Joaquin', 'Lake', 'Alameda', 'Glenn', 'Yolo',
 'Sacramento', 'Stanislaus', 'Solano', 'Merced', 'Mendocino', 'Lassen', 'Amador',
 'Yuba', 'Santa Clara', 'Calaveras', 'Orange', 'Colusa', 'Trinity', 'Del Norte',
 'Mono', 'Kings', 'Santa Cruz', 'Marin']
for county in counties_temp:
    print(f"{county}, CA", end=" ")
    request_result = get_weather_data(request_args = ("2013-08-17", "2013-08-17", "Ventura, CA"))
    print("OK" if request_result.status_code == 200 else "NOT OK!!")

Fresno, CA OK
Siskiyou, CA OK
Humboldt, CA OK
Tehama, CA OK
Shasta, CA OK
San Diego, CA OK
Kern, CA OK
Sonoma, CA OK
Contra Costa, CA OK
Butte, CA OK
Tulare, CA OK
Santa Barbara, CA OK
Mariposa, CA OK
Monterey, CA OK
El Dorado, CA OK
San Bernardino, CA OK
Plumas, CA OK
Modoc, CA OK
San Luis Obispo, CA OK
Madera, CA OK
Inyo, CA OK
Napa, CA OK
San Benito, CA OK
San Joaquin, CA OK
Lake, CA OK
Alameda, CA OK
Glenn, CA OK
Yolo, CA OK
Sacramento, CA OK
Stanislaus, CA OK
Solano, CA OK
Merced, CA OK
Mendocino, CA OK
Lassen, CA OK
Amador, CA OK
Yuba, CA OK
Santa Clara, CA OK
Calaveras, CA OK
Orange, CA OK
Colusa, CA OK
Trinity, CA OK
Del Norte, CA OK
Mono, CA OK
Kings, CA OK
Santa Cruz, CA OK
Marin, CA OK


# API Call Wrapper Function

In [92]:
import requests

def get_weather_data(
    param_args=['temperature', 'relative_humidity', 'wind_speed', 'wind_direction', 
                'total_precipitation', 'volumetric_soil_water_layer_1'],
    request_args = ("start", "end", "location")
):
    api_url = "https://api.oikolab.com/weather"
    start, end, location = request_args
    
    request_params = {
        "param": param_args,
        "start": start,
        "end": end,
        "location": location,
        "api-key": os.getenv('API_KEY_sdfds'),
    }
    result = requests.get(
        api_url,
        params=request_params
    )
    
    return result

# Running
## Imports

In [94]:
import pandas as pd
import os
from dotenv import load_dotenv
import numpy as np
import json

load_dotenv()

True

## Clean Data

In [97]:
wildfire_data = clean_data()

print(wildfire_data.shape)
wildfire_data.head()

(1602, 5)


Unnamed: 0,AcresBurned,Counties,Started,Started_Date,Started_Hour
0,257314.0,"Tuolumne, CA",2013-08-17T15:25:00Z,2013-08-17,15
1,30274.0,"Los Angeles, CA",2013-05-30T15:28:00Z,2013-05-30,15
2,27531.0,"Riverside, CA",2013-07-15T13:43:00Z,2013-07-15,13
3,27440.0,"Placer, CA",2013-08-10T16:30:00Z,2013-08-10,16
4,24251.0,"Ventura, CA",2013-05-02T07:01:00Z,2013-05-02,7
