In [1]:
import pandas as pd
import geopandas as gp
import numpy as np
import math
from functools import partial
from geog import propagate
from shapely.geometry import Polygon

In [2]:
METERS_PER_MILE: float = 1609.34
GEOG_N_POINTS: int = 20
BUFFER_RADIUS_MILES: float = 15

In [3]:
fires_df = pd.read_csv('Culled Wildfires 2000-2015.csv').dropna(subset=['LATITUDE','LONGITUDE'])

In [4]:
weather_df = pd.read_csv('Mean_Weather_Data_Final.csv').dropna(subset=['Lat','Lon'])

In [5]:
weather_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 39980 entries, 0 to 39979
Data columns (total 10 columns):
Unnamed: 0    39980 non-null int64
ID            39980 non-null object
Year          39980 non-null int64
Month         39980 non-null int64
Mean_Tmax     39980 non-null float64
Mean_Tmin     39980 non-null float64
Mean_Prcp     39980 non-null float64
Name          39980 non-null object
Lat           39980 non-null float64
Lon           39980 non-null float64
dtypes: float64(5), int64(3), object(2)
memory usage: 3.4+ MB


In [6]:
fires_gdf = gp.GeoDataFrame(fires_df, 
                            #geometry=[shapely.Point(x, y) for x, y in zip(fires_df.LATITUDE, fires_df.LONGITUDE)])
                            geometry=gp.points_from_xy(fires_df.LATITUDE, fires_df.LONGITUDE))
fires_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 121092 entries, 0 to 121091
Data columns (total 12 columns):
FIRE_DISC_DATE      121092 non-null object
FIRE_CONT_DATE      121092 non-null object
FIRE_DURATION       121092 non-null int64
FIRE_SIZE_CLASS     121092 non-null object
FIRE_SIZE           121092 non-null float64
STAT_CAUSE_DESCR    121092 non-null object
STATE               121092 non-null object
LATITUDE            121092 non-null float64
LONGITUDE           121092 non-null float64
FIRE_YEAR_DISC      121092 non-null int64
FIRE_MONTH_DISC     121092 non-null int64
geometry            121092 non-null geometry
dtypes: float64(3), geometry(1), int64(3), object(5)
memory usage: 12.0+ MB


In [7]:
# source - this is from one of siri's old project https://github.com/sirisurab/transpred/blob/master/src/geo_merger.py
distance: float = BUFFER_RADIUS_MILES * METERS_PER_MILE
angles = np.linspace(0, 360, GEOG_N_POINTS)
fires_gdf['buffer'] = fires_gdf.geometry.apply(propagate, angle=angles, d=distance)
fires_gdf['buffer'] = fires_gdf['buffer'].apply(Polygon)

In [8]:
fires_gdf.head()

Unnamed: 0,FIRE_DISC_DATE,FIRE_CONT_DATE,FIRE_DURATION,FIRE_SIZE_CLASS,FIRE_SIZE,STAT_CAUSE_DESCR,STATE,LATITUDE,LONGITUDE,FIRE_YEAR_DISC,FIRE_MONTH_DISC,geometry,buffer
0,2000-01-01,2000-01-20,19,B,5.0,Debris Burning,CA,36.44891,-118.73811,2000,1,POINT (36.449 -118.738),POLYGON ((35.99739044878418 -61.26113994531997...
1,2000-01-01,2000-01-05,4,D,250.0,Debris Burning,OK,34.710278,-94.866111,2000,1,POINT (34.710 -94.866),"POLYGON ((32.1526907810114 -85.12906013930856,..."
2,2000-01-01,2000-01-02,1,F,1200.0,Miscellaneous,TX,35.7042,-101.5456,2000,1,POINT (35.704 -101.546),POLYGON ((34.61964007622178 -78.45238678949779...
3,2000-01-01,2000-01-02,1,G,7320.0,Fireworks,OK,36.3765,-96.2922,2000,1,POINT (36.377 -96.292),POLYGON ((34.39644773453859 -83.70407096763326...
4,2000-01-03,2000-01-04,1,A,0.1,Powerline,WV,38.367662,-82.216708,2000,1,POINT (38.368 -82.217),"POLYGON ((39.97031253504689 -82.2136991145406,..."


In [9]:
fires_gdf = fires_gdf.set_geometry('buffer').rename(columns={'geometry':'point'})
fires_gdf.head()

Unnamed: 0,FIRE_DISC_DATE,FIRE_CONT_DATE,FIRE_DURATION,FIRE_SIZE_CLASS,FIRE_SIZE,STAT_CAUSE_DESCR,STATE,LATITUDE,LONGITUDE,FIRE_YEAR_DISC,FIRE_MONTH_DISC,point,buffer
0,2000-01-01,2000-01-20,19,B,5.0,Debris Burning,CA,36.44891,-118.73811,2000,1,POINT (36.449 -118.738),"POLYGON ((35.99739 -61.26114, 36.02089 -61.331..."
1,2000-01-01,2000-01-05,4,D,250.0,Debris Burning,OK,34.710278,-94.866111,2000,1,POINT (34.710 -94.866),"POLYGON ((32.15269 -85.12906, 32.25567 -85.200..."
2,2000-01-01,2000-01-02,1,F,1200.0,Miscellaneous,TX,35.7042,-101.5456,2000,1,POINT (35.704 -101.546),"POLYGON ((34.61964 -78.45239, 34.67218 -78.523..."
3,2000-01-01,2000-01-02,1,G,7320.0,Fireworks,OK,36.3765,-96.2922,2000,1,POINT (36.377 -96.292),"POLYGON ((34.39645 -83.70407, 34.48254 -83.774..."
4,2000-01-03,2000-01-04,1,A,0.1,Powerline,WV,38.367662,-82.216708,2000,1,POINT (38.368 -82.217),"POLYGON ((39.97031 -82.21370, 39.87000 -82.143..."


In [10]:
weather_gdf = gp.GeoDataFrame(weather_df, 
                            #geometry=[shapely.Point(x, y) for x, y in zip(fires_df.LATITUDE, fires_df.LONGITUDE)])
                            geometry=gp.points_from_xy(weather_df.Lat, weather_df.Lon))
weather_gdf.head()

Unnamed: 0.1,Unnamed: 0,ID,Year,Month,Mean_Tmax,Mean_Tmin,Mean_Prcp,Name,Lat,Lon,geometry
0,0,USC00042863,2000,1,71.551724,44.482759,0.0,Escondido,33.1211,-117.09,POINT (33.121 -117.090)
1,1,USC00042863,2000,2,70.136364,45.954545,0.0,Escondido,33.1211,-117.09,POINT (33.121 -117.090)
2,2,USC00042863,2000,3,70.0,45.806452,0.032258,Escondido,33.1211,-117.09,POINT (33.121 -117.090)
3,3,USC00042863,2000,4,76.482759,51.241379,0.0,Escondido,33.1211,-117.09,POINT (33.121 -117.090)
4,4,USC00042863,2000,5,80.928571,54.714286,0.0,Escondido,33.1211,-117.09,POINT (33.121 -117.090)


In [11]:
joined_gdf = gp.sjoin(fires_gdf, weather_gdf, how="inner", op='intersects')
joined_gdf

Unnamed: 0.1,FIRE_DISC_DATE,FIRE_CONT_DATE,FIRE_DURATION,FIRE_SIZE_CLASS,FIRE_SIZE,STAT_CAUSE_DESCR,STATE,LATITUDE,LONGITUDE,FIRE_YEAR_DISC,...,Unnamed: 0,ID,Year,Month,Mean_Tmax,Mean_Tmin,Mean_Prcp,Name,Lat,Lon
1,2000-01-01,2000-01-05,4,D,250.0,Debris Burning,OK,34.710278,-94.866111,2000,...,10295,USW00013882,2000,1,50.741935,31.741935,0.064516,Chattanooga,35.0311,-85.2014
212,2000-03-28,2000-03-29,1,C,10.0,Debris Burning,GA,34.683333,-85.133333,2000,...,10295,USW00013882,2000,1,50.741935,31.741935,0.064516,Chattanooga,35.0311,-85.2014
2265,2000-07-15,2000-07-17,2,B,5.0,Lightning,GA,34.516667,-85.066667,2000,...,10295,USW00013882,2000,1,50.741935,31.741935,0.064516,Chattanooga,35.0311,-85.2014
6522,2000-09-05,2000-09-06,1,C,40.0,Debris Burning,OK,34.399444,-94.636667,2000,...,10295,USW00013882,2000,1,50.741935,31.741935,0.064516,Chattanooga,35.0311,-85.2014
6609,2000-09-09,2000-09-14,5,E,500.0,Missing/Undefined,OK,34.550000,-94.766667,2000,...,10295,USW00013882,2000,1,50.741935,31.741935,0.064516,Chattanooga,35.0311,-85.2014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102257,2013-11-18,2013-11-21,3,B,0.5,Miscellaneous,NJ,39.525800,-75.394500,2013,...,8757,USW00013781,2015,11,60.433333,41.166667,0.033333,Wilmington,39.6728,-75.6008
102271,2013-11-19,2013-11-20,1,B,1.4,Campfire,MD,39.705246,-75.807279,2013,...,8757,USW00013781,2015,11,60.433333,41.166667,0.033333,Wilmington,39.6728,-75.6008
84539,2012-04-09,2012-04-13,4,E,741.0,Missing/Undefined,PA,40.213300,-75.761400,2012,...,8758,USW00013781,2015,12,57.129032,41.580645,0.064516,Wilmington,39.6728,-75.6008
102257,2013-11-18,2013-11-21,3,B,0.5,Miscellaneous,NJ,39.525800,-75.394500,2013,...,8758,USW00013781,2015,12,57.129032,41.580645,0.064516,Wilmington,39.6728,-75.6008


In [12]:
#converting gdf to df
joined_df = pd.DataFrame(joined_gdf)

In [13]:
joined_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2509930 entries, 1 to 102271
Data columns (total 24 columns):
FIRE_DISC_DATE      object
FIRE_CONT_DATE      object
FIRE_DURATION       int64
FIRE_SIZE_CLASS     object
FIRE_SIZE           float64
STAT_CAUSE_DESCR    object
STATE               object
LATITUDE            float64
LONGITUDE           float64
FIRE_YEAR_DISC      int64
FIRE_MONTH_DISC     int64
point               geometry
buffer              geometry
index_right         int64
Unnamed: 0          int64
ID                  object
Year                int64
Month               int64
Mean_Tmax           float64
Mean_Tmin           float64
Mean_Prcp           float64
Name                object
Lat                 float64
Lon                 float64
dtypes: float64(8), geometry(2), int64(7), object(7)
memory usage: 478.7+ MB


In [14]:
finaljoin_df = joined_df[joined_df['FIRE_YEAR_DISC'] == joined_df['Year']]
finaljoin_df = finaljoin_df[finaljoin_df['FIRE_MONTH_DISC'] == finaljoin_df['Month']]

finaljoin_df

Unnamed: 0.1,FIRE_DISC_DATE,FIRE_CONT_DATE,FIRE_DURATION,FIRE_SIZE_CLASS,FIRE_SIZE,STAT_CAUSE_DESCR,STATE,LATITUDE,LONGITUDE,FIRE_YEAR_DISC,...,Unnamed: 0,ID,Year,Month,Mean_Tmax,Mean_Tmin,Mean_Prcp,Name,Lat,Lon
1,2000-01-01,2000-01-05,4,D,250.0,Debris Burning,OK,34.710278,-94.866111,2000,...,10295,USW00013882,2000,1,50.741935,31.741935,0.064516,Chattanooga,35.0311,-85.2014
212,2000-03-28,2000-03-29,1,C,10.0,Debris Burning,GA,34.683333,-85.133333,2000,...,10297,USW00013882,2000,3,68.967742,43.419355,0.032258,Chattanooga,35.0311,-85.2014
2265,2000-07-15,2000-07-17,2,B,5.0,Lightning,GA,34.516667,-85.066667,2000,...,10301,USW00013882,2000,7,91.741935,69.193548,0.000000,Chattanooga,35.0311,-85.2014
6522,2000-09-05,2000-09-06,1,C,40.0,Debris Burning,OK,34.399444,-94.636667,2000,...,10303,USW00013882,2000,9,80.933333,62.433333,0.033333,Chattanooga,35.0311,-85.2014
6609,2000-09-09,2000-09-14,5,E,500.0,Missing/Undefined,OK,34.550000,-94.766667,2000,...,10303,USW00013882,2000,9,80.933333,62.433333,0.033333,Chattanooga,35.0311,-85.2014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93209,2013-01-06,2013-01-07,1,A,0.2,Miscellaneous,MD,39.466000,-76.758000,2013,...,15251,USW00014711,2013,1,39.709677,25.645161,0.032258,MiddletownHarrisburg,40.1936,-76.7633
113713,2015-04-18,2015-04-19,1,C,12.5,Miscellaneous,PA,40.764720,-76.629170,2015,...,15278,USW00014711,2015,4,64.666667,41.900000,0.033333,MiddletownHarrisburg,40.1936,-76.7633
84539,2012-04-09,2012-04-13,4,E,741.0,Missing/Undefined,PA,40.213300,-75.761400,2012,...,8714,USW00013781,2012,4,64.800000,41.966667,0.033333,Wilmington,39.6728,-75.6008
102257,2013-11-18,2013-11-21,3,B,0.5,Miscellaneous,NJ,39.525800,-75.394500,2013,...,8733,USW00013781,2013,11,53.700000,34.133333,0.033333,Wilmington,39.6728,-75.6008


In [15]:
finaljoin_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13103 entries, 1 to 102271
Data columns (total 24 columns):
FIRE_DISC_DATE      13103 non-null object
FIRE_CONT_DATE      13103 non-null object
FIRE_DURATION       13103 non-null int64
FIRE_SIZE_CLASS     13103 non-null object
FIRE_SIZE           13103 non-null float64
STAT_CAUSE_DESCR    13103 non-null object
STATE               13103 non-null object
LATITUDE            13103 non-null float64
LONGITUDE           13103 non-null float64
FIRE_YEAR_DISC      13103 non-null int64
FIRE_MONTH_DISC     13103 non-null int64
point               13103 non-null geometry
buffer              13103 non-null geometry
index_right         13103 non-null int64
Unnamed: 0          13103 non-null int64
ID                  13103 non-null object
Year                13103 non-null int64
Month               13103 non-null int64
Mean_Tmax           13103 non-null float64
Mean_Tmin           13103 non-null float64
Mean_Prcp           13103 non-null float64
Name 

In [23]:
graphdata_df = finaljoin_df.drop(columns=['FIRE_DISC_DATE','FIRE_CONT_DATE','point','buffer','index_right','Unnamed: 0','ID','Year','Month','Lat','Lon'])
graphdata_df = graphdata_df[['FIRE_YEAR_DISC',
                            'FIRE_MONTH_DISC',
                            'FIRE_DURATION',
                            'Name',
                            'STATE',
                            'FIRE_SIZE',
                            'FIRE_SIZE_CLASS',
                            'STAT_CAUSE_DESCR',
                            'Mean_Tmax',
                            'Mean_Tmin',
                            'Mean_Prcp',
                            'LATITUDE',
                            'LONGITUDE']]
graphdata_df = graphdata_df.sort_values(by=['FIRE_YEAR_DISC','FIRE_MONTH_DISC','FIRE_SIZE']).reset_index(drop=True)
graphdata_df

Unnamed: 0,FIRE_YEAR_DISC,FIRE_MONTH_DISC,FIRE_DURATION,Name,STATE,FIRE_SIZE,FIRE_SIZE_CLASS,STAT_CAUSE_DESCR,Mean_Tmax,Mean_Tmin,Mean_Prcp,LATITUDE,LONGITUDE
0,2000,1,1,RochesterNY,NY,0.10,A,Miscellaneous,32.516129,13.806452,0.000000,43.168030,-77.572036
1,2000,1,1,NewYork,NY,0.10,A,Miscellaneous,37.870968,24.677419,0.000000,40.929085,-73.872211
2,2000,1,182,Macon,GA,0.43,B,Debris Burning,56.387097,35.064516,0.096774,31.464200,-83.534500
3,2000,1,1,Cincinnati,KY,3.10,B,Miscellaneous,37.709677,19.548387,0.096774,36.967500,-84.468611
4,2000,1,1,Lexington,KY,3.10,B,Miscellaneous,40.032258,23.677419,0.064516,36.967500,-84.468611
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13098,2015,12,1,GreenvilleSpartanburg,TN,40.00,C,Miscellaneous,63.870968,46.483871,0.161290,36.221111,-82.258056
13099,2015,12,3,Athens,OK,41.90,C,Debris Burning,65.645161,47.129032,0.290323,34.879400,-96.735600
13100,2015,12,1,Raleigh,TX,249.60,D,Powerline,65.064516,47.064516,0.064516,35.596500,-101.419000
13101,2015,12,1,Raleigh,TX,300.00,E,Powerline,65.064516,47.064516,0.064516,35.827317,-101.419350


In [25]:
graphdata_df.to_csv("Merged Fire Weather Data.csv",index=False)