# Imports

In [1]:
import numpy as np
import pandas as pd
import sys

from scipy.spatial import KDTree

# Load cleaned wildfire data

In [2]:
# Save space using smaller dtypes
fires = pd.read_csv("D:/wildfires/fires_cleaned.csv", index_col="FOD_ID", dtype={
    'FIRE_YEAR': np.int16,
    'DISCOVERY_DOY': np.int16
})
# Shrink memory use by using categoricals rather than strings, when appropriate
fires.STAT_CAUSE_DESCR = pd.Categorical(fires.STAT_CAUSE_DESCR)
fires.STATE = pd.Categorical(fires.STATE)
fires.DISCOVERY_MONTH = pd.Categorical(fires.DISCOVERY_MONTH)

  mask |= (ar1 == a)


In [3]:
fires.sample(5)

Unnamed: 0_level_0,FIRE_NAME,STAT_CAUSE_DESCR,FIRE_YEAR,DISCOVERY_DOY,FIRE_SIZE,LATITUDE,LONGITUDE,STATE,CONTAINED,DISCOVERY_MONTH
FOD_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
201140120,,Debris Burning,2011,80,0.1,41.092564,-95.976501,NE,True,3
986206,,Debris Burning,2000,83,2.0,34.4672,-92.8864,AR,False,3
920086,,Debris Burning,2005,89,6.0,34.7947,-88.6322,MS,False,3
184758,MNL11,Arson,1992,78,3.0,46.2091,-93.7564,MN,True,3
608944,,Arson,2004,95,3.0,35.62873,-94.81113,OK,False,4


# Load city-level data

In [4]:
cities = pd.read_csv("D:/wildfires/worldcitiespop.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
# Drop cities with unknown population
cities = cities[~pd.isna(cities.Population)]

In [6]:
# Only cities in North America, for speed
north_america = set(['us', 'ca', 'mx'])
cities = cities[cities.Country.apply(lambda x: x in north_america)]

In [7]:
cities.sample(5)

Unnamed: 0,Country,City,AccentCity,Region,Population,Latitude,Longitude
1879808,mx,huitziltepec,Huitziltepec,12,4419.0,17.816667,-99.466667
2924960,us,coventry,Coventry,CT,12428.0,41.77,-72.305556
1861786,mx,buenos aires,Buenos Aires,16,10707.0,19.066667,-102.166667
1915250,mx,tocuila,Tocuila,30,4248.0,18.95,-97.016667
1857343,mx,acuexcomac,Acuexcomac,21,4236.0,18.841667,-98.063889


# Augment data to include population center distances

In [8]:
cities[['Latitude', 'Longitude']].shape

(6583, 2)

In [9]:
# Make KD-trees for fast lookup
for pop in 10**np.array([6,5,4,3]):
    
    print("Querying for cities with population >= %d" % pop)
    
    cities_pop = cities.loc[cities.Population >= pop]
    
    print("%d fires and %d cities" % (fires.shape[0], cities_pop.shape[0]))
    
    kd_tree = KDTree(cities_pop[['Latitude', 'Longitude']])
    %time d_neighbors, idx_neighbors = kd_tree.query(fires[['LATITUDE', 'LONGITUDE']])
    
    # Store resulting distances
    fires["DISTANCE_CITY_%d" % pop] = d_neighbors

Querying for cities with population >= 1000000
1880465 fires and 21 cities
Wall time: 2min 50s
Querying for cities with population >= 100000
1880465 fires and 385 cities
Wall time: 4min 3s
Querying for cities with population >= 10000
1880465 fires and 4421 cities
Wall time: 5min 23s
Querying for cities with population >= 1000
1880465 fires and 6565 cities
Wall time: 5min 53s


In [10]:
fires.sample(5)

Unnamed: 0_level_0,FIRE_NAME,STAT_CAUSE_DESCR,FIRE_YEAR,DISCOVERY_DOY,FIRE_SIZE,LATITUDE,LONGITUDE,STATE,CONTAINED,DISCOVERY_MONTH,DISTANCE_CITY_1000000,DISTANCE_CITY_100000,DISTANCE_CITY_10000,DISTANCE_CITY_1000
FOD_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
201151882,WEST BAY (03),Lightning,2011,160,35.0,30.2704,-85.8983,FL,True,6,9.478344,1.626133,0.251226,0.251226
1091452,,Railroad,1993,56,15.0,28.22,-81.73,FL,False,2,13.444523,0.473338,0.154244,0.154244
217647,HANCOCK,Equipment Use,1998,227,1.0,38.832631,-120.867219,CA,True,8,5.453439,0.427358,0.124302,0.124302
1671352,,Debris Burning,2007,288,35.0,38.44403,-83.21827,KY,True,10,5.589353,1.301509,0.362425,0.362425
1643291,FY2000-POLK-133,Debris Burning,2000,63,25.1,33.9331,-85.291,GA,True,3,8.260883,0.921543,0.347594,0.125773


# Load weather-station level data

In [11]:
stations = pd.read_csv('D:/wildfires/isd-history.csv')

In [12]:
# Only cities in North America, for speed. Also exclude unknown latitude, longitude, elevation
north_america = set(['US', 'CA', 'MX'])
stations = stations[stations.CTRY.apply(lambda x: x in north_america)
                & ~pd.isna(stations.LAT)
                & ~pd.isna(stations.LON)
                & ~pd.isna(stations["ELEV(M)"])]

In [13]:
stations.sample(5)

Unnamed: 0,USAF,WBAN,STATION NAME,CTRY,STATE,ICAO,LAT,LON,ELEV(M),BEGIN,END
19507,724293,376,MOUNT WERNER,US,CO,K3MW,40.46,-106.76,3241.2,20050322,20140721
17127,720202,99999,TILLAMOOK AWS,US,OR,KTMK,45.417,-123.817,11.0,20040429,20180514
17119,720172,99999,MENA INTERMOUNTAIN,US,AR,KMEZ,34.545,-94.203,329.0,20041103,20051231
17471,720542,99999,LARGO VISTA / RUSTY ALLEN AIRPORT,US,TX,KRYW,30.5,-97.967,375.0,20080825,20180322
15303,702700,99999,FT. RICHARDSON/BRYANT AHP AK,US,,PAFR,61.267,-149.65,115.0,19731201,20180520


# Augment data to include nearest elevation

In [14]:
# Make KD-trees for fast lookup
    
print("Querying for elevation")

print("%d fires and %d stations" % (fires.shape[0], stations.shape[0]))

kd_tree = KDTree(stations[['LAT', 'LON']])
%time d_neighbors, idx_neighbors = kd_tree.query(fires[['LATITUDE', 'LONGITUDE']])

# Store resulting elevations
fires["APPROX_ELEVATION"] = stations["ELEV(M)"].iloc[idx_neighbors].values

Querying for elevation
1880465 fires and 9498 stations
Wall time: 6min 18s


In [15]:
fires.sample(5)

Unnamed: 0_level_0,FIRE_NAME,STAT_CAUSE_DESCR,FIRE_YEAR,DISCOVERY_DOY,FIRE_SIZE,LATITUDE,LONGITUDE,STATE,CONTAINED,DISCOVERY_MONTH,DISTANCE_CITY_1000000,DISTANCE_CITY_100000,DISTANCE_CITY_10000,DISTANCE_CITY_1000,APPROX_ELEVATION
FOD_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1034469,,Miscellaneous,1998,138,1.0,34.6281,-81.8247,SC,False,5,8.526956,1.008681,0.338856,0.165416,212.1
19104376,,Debris Burning,1997,261,0.25,30.9844,-83.7563,GA,True,9,11.542191,0.757382,0.198068,0.198068,86.3
201779996,FRY FIRE,Debris Burning,2013,115,0.1,48.36531,-119.26689,WA,True,4,3.966366,1.549878,1.177989,0.697256,396.2
19106826,,Debris Burning,1995,348,0.19,31.3955,-82.8381,GA,True,12,11.49377,1.590253,0.113735,0.113735,78.3
300116484,WESTERN FIRE,Arson,2014,242,0.4,40.8173,-72.8169,NY,True,8,1.193952,0.503309,0.028905,0.028905,26.2


In [16]:
fires.to_csv("D:/wildfires/fires_merged.csv")

# Load weather data (todo)