In [2]:
import pandas as pd
import geopandas as gp
import numpy as np
import math
from functools import partial
from geog import propagate
from shapely.geometry import Polygon

In [3]:
METERS_PER_MILE: float = 1609.34
GEOG_N_POINTS: int = 20
BUFFER_RADIUS_MILES: float = 15

In [4]:
fires_df = pd.read_csv('Culled Wildfires 2000-2015.csv').dropna(subset=['LATITUDE','LONGITUDE'])

In [5]:
weather_df = pd.read_csv('Mean_Weather_Data_Final.csv').dropna(subset=['Lat','Lon'])

In [27]:
weather_df['Year'].value_counts()

2015    2508
2013    2508
2012    2508
2011    2505
2014    2504
2010    2502
2006    2502
2005    2496
2004    2496
2000    2496
2002    2495
2001    2495
2008    2494
2003    2493
2009    2489
2007    2489
Name: Year, dtype: int64

In [6]:
weather_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 39980 entries, 0 to 39979
Data columns (total 10 columns):
Unnamed: 0    39980 non-null int64
ID            39980 non-null object
Year          39980 non-null int64
Month         39980 non-null int64
Mean_Tmax     39980 non-null float64
Mean_Tmin     39980 non-null float64
Mean_Prcp     39980 non-null float64
Name          39980 non-null object
Lat           39980 non-null float64
Lon           39980 non-null float64
dtypes: float64(5), int64(3), object(2)
memory usage: 3.4+ MB


In [7]:
fires_gdf = gp.GeoDataFrame(fires_df, 
                            #geometry=[shapely.Point(x, y) for x, y in zip(fires_df.LATITUDE, fires_df.LONGITUDE)])
                            geometry=gp.points_from_xy(fires_df.LATITUDE, fires_df.LONGITUDE))
fires_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 121092 entries, 0 to 121091
Data columns (total 12 columns):
FIRE_DISC_DATE      121092 non-null object
FIRE_CONT_DATE      121092 non-null object
FIRE_DURATION       121092 non-null int64
FIRE_SIZE_CLASS     121092 non-null object
FIRE_SIZE           121092 non-null float64
STAT_CAUSE_DESCR    121092 non-null object
STATE               121092 non-null object
LATITUDE            121092 non-null float64
LONGITUDE           121092 non-null float64
FIRE_YEAR_DISC      121092 non-null int64
FIRE_MONTH_DISC     121092 non-null int64
geometry            121092 non-null geometry
dtypes: float64(3), geometry(1), int64(3), object(5)
memory usage: 12.0+ MB


In [8]:
# source - this is from one of siri's old project https://github.com/sirisurab/transpred/blob/master/src/geo_merger.py
distance: float = BUFFER_RADIUS_MILES * METERS_PER_MILE
angles = np.linspace(0, 360, GEOG_N_POINTS)
fires_gdf['buffer'] = fires_gdf.geometry.apply(propagate, angle=angles, d=distance)
fires_gdf['buffer'] = fires_gdf['buffer'].apply(Polygon)

In [9]:
fires_gdf.head()

Unnamed: 0,FIRE_DISC_DATE,FIRE_CONT_DATE,FIRE_DURATION,FIRE_SIZE_CLASS,FIRE_SIZE,STAT_CAUSE_DESCR,STATE,LATITUDE,LONGITUDE,FIRE_YEAR_DISC,FIRE_MONTH_DISC,geometry,buffer
0,2000-01-01,2000-01-20,19,B,5.0,Debris Burning,CA,36.44891,-118.73811,2000,1,POINT (36.449 -118.738),POLYGON ((35.99739044878418 -61.26113994531997...
1,2000-01-01,2000-01-05,4,D,250.0,Debris Burning,OK,34.710278,-94.866111,2000,1,POINT (34.710 -94.866),"POLYGON ((32.1526907810114 -85.12906013930856,..."
2,2000-01-01,2000-01-02,1,F,1200.0,Miscellaneous,TX,35.7042,-101.5456,2000,1,POINT (35.704 -101.546),POLYGON ((34.61964007622178 -78.45238678949779...
3,2000-01-01,2000-01-02,1,G,7320.0,Fireworks,OK,36.3765,-96.2922,2000,1,POINT (36.377 -96.292),POLYGON ((34.39644773453859 -83.70407096763326...
4,2000-01-03,2000-01-04,1,A,0.1,Powerline,WV,38.367662,-82.216708,2000,1,POINT (38.368 -82.217),"POLYGON ((39.97031253504689 -82.2136991145406,..."


In [10]:
fires_gdf = fires_gdf.set_geometry('buffer').rename(columns={'geometry':'point'})
fires_gdf.head()

Unnamed: 0,FIRE_DISC_DATE,FIRE_CONT_DATE,FIRE_DURATION,FIRE_SIZE_CLASS,FIRE_SIZE,STAT_CAUSE_DESCR,STATE,LATITUDE,LONGITUDE,FIRE_YEAR_DISC,FIRE_MONTH_DISC,point,buffer
0,2000-01-01,2000-01-20,19,B,5.0,Debris Burning,CA,36.44891,-118.73811,2000,1,POINT (36.449 -118.738),"POLYGON ((35.99739 -61.26114, 36.02089 -61.331..."
1,2000-01-01,2000-01-05,4,D,250.0,Debris Burning,OK,34.710278,-94.866111,2000,1,POINT (34.710 -94.866),"POLYGON ((32.15269 -85.12906, 32.25567 -85.200..."
2,2000-01-01,2000-01-02,1,F,1200.0,Miscellaneous,TX,35.7042,-101.5456,2000,1,POINT (35.704 -101.546),"POLYGON ((34.61964 -78.45239, 34.67218 -78.523..."
3,2000-01-01,2000-01-02,1,G,7320.0,Fireworks,OK,36.3765,-96.2922,2000,1,POINT (36.377 -96.292),"POLYGON ((34.39645 -83.70407, 34.48254 -83.774..."
4,2000-01-03,2000-01-04,1,A,0.1,Powerline,WV,38.367662,-82.216708,2000,1,POINT (38.368 -82.217),"POLYGON ((39.97031 -82.21370, 39.87000 -82.143..."


In [11]:
weather_gdf = gp.GeoDataFrame(weather_df, 
                            #geometry=[shapely.Point(x, y) for x, y in zip(fires_df.LATITUDE, fires_df.LONGITUDE)])
                            geometry=gp.points_from_xy(weather_df.Lat, weather_df.Lon))
weather_gdf.head()

Unnamed: 0.1,Unnamed: 0,ID,Year,Month,Mean_Tmax,Mean_Tmin,Mean_Prcp,Name,Lat,Lon,geometry
0,0,USC00042863,2000,1,71.551724,44.482759,0.0,Escondido,33.1211,-117.09,POINT (33.121 -117.090)
1,1,USC00042863,2000,2,70.136364,45.954545,0.0,Escondido,33.1211,-117.09,POINT (33.121 -117.090)
2,2,USC00042863,2000,3,70.0,45.806452,0.032258,Escondido,33.1211,-117.09,POINT (33.121 -117.090)
3,3,USC00042863,2000,4,76.482759,51.241379,0.0,Escondido,33.1211,-117.09,POINT (33.121 -117.090)
4,4,USC00042863,2000,5,80.928571,54.714286,0.0,Escondido,33.1211,-117.09,POINT (33.121 -117.090)


In [28]:
joined_gdf = gp.sjoin(fires_gdf, weather_gdf, how="left", op='intersects')
joined_gdf

Unnamed: 0.1,FIRE_DISC_DATE,FIRE_CONT_DATE,FIRE_DURATION,FIRE_SIZE_CLASS,FIRE_SIZE,STAT_CAUSE_DESCR,STATE,LATITUDE,LONGITUDE,FIRE_YEAR_DISC,...,Unnamed: 0,ID,Year,Month,Mean_Tmax,Mean_Tmin,Mean_Prcp,Name,Lat,Lon
0,2000-01-01,2000-01-20,19,B,5.0,Debris Burning,CA,36.448910,-118.738110,2000,...,,,,,,,,,,
1,2000-01-01,2000-01-05,4,D,250.0,Debris Burning,OK,34.710278,-94.866111,2000,...,10295.0,USW00013882,2000.0,1.0,50.741935,31.741935,0.064516,Chattanooga,35.0311,-85.2014
1,2000-01-01,2000-01-05,4,D,250.0,Debris Burning,OK,34.710278,-94.866111,2000,...,10296.0,USW00013882,2000.0,2.0,61.862069,35.758621,0.000000,Chattanooga,35.0311,-85.2014
1,2000-01-01,2000-01-05,4,D,250.0,Debris Burning,OK,34.710278,-94.866111,2000,...,10297.0,USW00013882,2000.0,3.0,68.967742,43.419355,0.032258,Chattanooga,35.0311,-85.2014
1,2000-01-01,2000-01-05,4,D,250.0,Debris Burning,OK,34.710278,-94.866111,2000,...,10298.0,USW00013882,2000.0,4.0,70.833333,47.500000,0.233333,Chattanooga,35.0311,-85.2014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121089,2015-12-26,2015-12-27,1,E,300.0,Powerline,TX,35.827317,-101.419350,2015,...,7412.0,USW00013722,2015.0,10.0,71.000000,49.838710,0.096774,Raleigh,35.8922,-78.7819
121089,2015-12-26,2015-12-27,1,E,300.0,Powerline,TX,35.827317,-101.419350,2015,...,7413.0,USW00013722,2015.0,11.0,65.466667,45.400000,0.100000,Raleigh,35.8922,-78.7819
121089,2015-12-26,2015-12-27,1,E,300.0,Powerline,TX,35.827317,-101.419350,2015,...,7414.0,USW00013722,2015.0,12.0,65.064516,47.064516,0.064516,Raleigh,35.8922,-78.7819
121090,2015-12-28,2015-12-29,1,B,1.0,Children,FL,27.697300,-82.307200,2015,...,,,,,,,,,,


In [13]:
#converting gdf to df
joined_df = pd.DataFrame(joined_gdf)

In [14]:
joined_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2509930 entries, 1 to 102271
Data columns (total 24 columns):
FIRE_DISC_DATE      object
FIRE_CONT_DATE      object
FIRE_DURATION       int64
FIRE_SIZE_CLASS     object
FIRE_SIZE           float64
STAT_CAUSE_DESCR    object
STATE               object
LATITUDE            float64
LONGITUDE           float64
FIRE_YEAR_DISC      int64
FIRE_MONTH_DISC     int64
point               geometry
buffer              geometry
index_right         int64
Unnamed: 0          int64
ID                  object
Year                int64
Month               int64
Mean_Tmax           float64
Mean_Tmin           float64
Mean_Prcp           float64
Name                object
Lat                 float64
Lon                 float64
dtypes: float64(8), geometry(2), int64(7), object(7)
memory usage: 478.7+ MB


In [15]:
joined_df = joined_df.reset_index()


In [18]:
joined_df.drop_duplicates(subset='index',inplace=True)

In [23]:
joined_df['FIRE_DISC_DATE'] = pd.to_datetime(joined_df['FIRE_DISC_DATE'],format="%Y-%m-%d")

In [24]:
finaljoin_df = joined_df.loc[(joined_df['FIRE_DISC_DATE'].dt.year == joined_df['Year']) & (joined_df['FIRE_DISC_DATE'].dt.month == joined_df['Month'])]

In [25]:
finaljoin_df

Unnamed: 0.1,index,FIRE_DISC_DATE,FIRE_CONT_DATE,FIRE_DURATION,FIRE_SIZE_CLASS,FIRE_SIZE,STAT_CAUSE_DESCR,STATE,LATITUDE,LONGITUDE,...,Unnamed: 0,ID,Year,Month,Mean_Tmax,Mean_Tmin,Mean_Prcp,Name,Lat,Lon
0,1,2000-01-01,2000-01-05,4,D,250.0,Debris Burning,OK,34.710278,-94.866111,...,10295,USW00013882,2000,1,50.741935,31.741935,0.064516,Chattanooga,35.0311,-85.2014
64512,13,2000-01-07,2000-07-07,182,B,0.43,Debris Burning,GA,31.4642,-83.5345,...,2039,USW00003813,2000,1,56.387097,35.064516,0.096774,Macon,32.6847,-83.6528
75072,14,2000-01-07,2000-01-09,2,C,60.0,Miscellaneous,MS,31.366667,-91.15,...,9143,USW00013865,2000,1,59.193548,36.645161,0.0,Meridian,32.3347,-88.7442
143040,15,2000-01-08,2000-01-09,1,A,0.1,Miscellaneous,NY,43.16803,-77.572036,...,16823,USW00014768,2000,1,32.516129,13.806452,0.0,RochesterNY,43.1167,-77.6767
253824,19,2000-01-11,2000-01-12,1,B,3.1,Miscellaneous,KY,36.9675,-84.468611,...,33332,USW00093812,2000,1,37.709677,19.548387,0.096774,Cincinnati,39.1033,-84.4189
278016,20,2000-01-12,2000-01-13,1,A,0.1,Miscellaneous,NY,40.929085,-73.872211,...,36908,USW00094728,2000,1,37.870968,24.677419,0.0,NewYork,40.7789,-73.9692
435648,21,2000-01-12,2000-01-13,1,C,10.0,Campfire,WV,37.570092,-81.013241,...,2615,USW00003859,2000,1,40.8,23.633333,0.0,Bluefield,37.2958,-81.2078
470784,24,2000-01-14,2000-01-15,1,D,160.0,Miscellaneous,TX,30.45,-95.05,...,34796,USW00093842,2000,1,57.225806,38.129032,0.032258,ColumbusGA,32.5161,-84.9422
510144,27,2000-01-15,2000-01-16,1,C,65.0,Miscellaneous,MS,31.498333,-90.875,...,8759,USW00013833,2000,1,62.0,39.608696,0.086957,Hattiesburg,31.2819,-89.2531
782208,39,2000-01-23,2000-01-24,1,C,15.0,Campfire,CO,40.445278,-105.316389,...,17207,USW00014792,2000,1,38.741935,22.419355,0.0,Trenton,40.2769,-74.8158


In [15]:
finaljoin_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13103 entries, 1 to 102271
Data columns (total 24 columns):
FIRE_DISC_DATE      13103 non-null object
FIRE_CONT_DATE      13103 non-null object
FIRE_DURATION       13103 non-null int64
FIRE_SIZE_CLASS     13103 non-null object
FIRE_SIZE           13103 non-null float64
STAT_CAUSE_DESCR    13103 non-null object
STATE               13103 non-null object
LATITUDE            13103 non-null float64
LONGITUDE           13103 non-null float64
FIRE_YEAR_DISC      13103 non-null int64
FIRE_MONTH_DISC     13103 non-null int64
point               13103 non-null geometry
buffer              13103 non-null geometry
index_right         13103 non-null int64
Unnamed: 0          13103 non-null int64
ID                  13103 non-null object
Year                13103 non-null int64
Month               13103 non-null int64
Mean_Tmax           13103 non-null float64
Mean_Tmin           13103 non-null float64
Mean_Prcp           13103 non-null float64
Name 

In [23]:
graphdata_df = finaljoin_df.drop(columns=['FIRE_DISC_DATE','FIRE_CONT_DATE','point','buffer','index_right','Unnamed: 0','ID','Year','Month','Lat','Lon'])
graphdata_df = graphdata_df[['FIRE_YEAR_DISC',
                            'FIRE_MONTH_DISC',
                            'FIRE_DURATION',
                            'Name',
                            'STATE',
                            'FIRE_SIZE',
                            'FIRE_SIZE_CLASS',
                            'STAT_CAUSE_DESCR',
                            'Mean_Tmax',
                            'Mean_Tmin',
                            'Mean_Prcp',
                            'LATITUDE',
                            'LONGITUDE']]
graphdata_df = graphdata_df.sort_values(by=['FIRE_YEAR_DISC','FIRE_MONTH_DISC','FIRE_SIZE']).reset_index(drop=True)
graphdata_df

Unnamed: 0,FIRE_YEAR_DISC,FIRE_MONTH_DISC,FIRE_DURATION,Name,STATE,FIRE_SIZE,FIRE_SIZE_CLASS,STAT_CAUSE_DESCR,Mean_Tmax,Mean_Tmin,Mean_Prcp,LATITUDE,LONGITUDE
0,2000,1,1,RochesterNY,NY,0.10,A,Miscellaneous,32.516129,13.806452,0.000000,43.168030,-77.572036
1,2000,1,1,NewYork,NY,0.10,A,Miscellaneous,37.870968,24.677419,0.000000,40.929085,-73.872211
2,2000,1,182,Macon,GA,0.43,B,Debris Burning,56.387097,35.064516,0.096774,31.464200,-83.534500
3,2000,1,1,Cincinnati,KY,3.10,B,Miscellaneous,37.709677,19.548387,0.096774,36.967500,-84.468611
4,2000,1,1,Lexington,KY,3.10,B,Miscellaneous,40.032258,23.677419,0.064516,36.967500,-84.468611
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13098,2015,12,1,GreenvilleSpartanburg,TN,40.00,C,Miscellaneous,63.870968,46.483871,0.161290,36.221111,-82.258056
13099,2015,12,3,Athens,OK,41.90,C,Debris Burning,65.645161,47.129032,0.290323,34.879400,-96.735600
13100,2015,12,1,Raleigh,TX,249.60,D,Powerline,65.064516,47.064516,0.064516,35.596500,-101.419000
13101,2015,12,1,Raleigh,TX,300.00,E,Powerline,65.064516,47.064516,0.064516,35.827317,-101.419350


In [25]:
graphdata_df.to_csv("Merged Fire Weather Data.csv",index=False)