In [2]:
import numpy as np
import matplotlib as plt
import pandas as pd
import os
from datetime import datetime

# Cleaning Drought Data

From Drought Monitor: https://droughtmonitor.unl.edu/Data.aspx 

Alternatively, try to find the data we want from here: https://droughtmonitor.unl.edu/DmData/DataDownload/ComprehensiveStatistics.aspx

> Please note, shapefiles prior to 2004 have been digitized from the image archive. Preliminary error
> analysis indicates that the digitized datasets have a horizontal error of approximately 4 miles when
> compared to authored datasets. For comparison, the drought impacts line in a georeferenced image is
> approximately 12 miles wide

# Cleaning Earthquake Data

From the US Geological Survey.
Alternatively, try to find the data we want from here: https://earthquake.usgs.gov/earthquakes/search/

Explanation of fields: https://earthquake.usgs.gov/data/comcat/data-eventterms.php 

# Cleaning Tornado Data
From the Storm Prediction Center: https://www.spc.noaa.gov/gis/svrgis/ 

Explanation of fields:
https://www.spc.noaa.gov/wcm/data/SPC_severe_database_description.pdf 

In [3]:
def cost_by_year(data, year):
    # lower_date = datetime(year=year)
    # upper_date = datetime(year=year+1)
    filtered = data.loc[data["yr"]==year]
    costs = [loss for loss in filtered["loss"]]
    print(costs)
    return sum(costs)


In [4]:

cwd = os.getcwd()
date_format = "%Y-%m-%d"
TORNADO_PATH = os.path.join(cwd, "../..", "data/tornadoes/US_tornadoes_2000_2022.csv")

data = pd.read_csv(TORNADO_PATH)
filtered_by_cost = data.loc[data["loss"]>=1]

dates = [datetime.strptime(time,date_format) for time in filtered_by_cost["date"]]
lats = np.array([[slat for slat in filtered_by_cost["slat"]], [elat for elat in filtered_by_cost["elat"]]]).T
lons = np.array([[slon for slon in filtered_by_cost["slon"]], [elon for elon in filtered_by_cost["elon"]]]).T
cost = [loss for loss in filtered_by_cost["loss"]]
# print(lats)
# print(lons)
# print(cost)
print(cost_by_year(data,2002))



[0.0, 0.0, 0.75, 0.01, 0.0, 3.0, 0.02, 0.0, 0.001, 0.05, 0.5, 0.02, 0.0, 75.0, 10.0, 0.0, 0.18, 0.06, 0.1, 0.31, 0.21, 5.0, 0.03, 0.75, 0.01, 0.09, 0.03, 0.03, 0.01, 0.03, 0.5, 0.03, 1.0, 0.03, 0.03, 0.01, 0.03, 0.01, 0.03, 0.01, 0.03, 0.003, 0.003, 0.01, 0.003, 0.001, 0.01, 0.01, 0.0, 0.01, 0.01, 0.02, 0.01, 0.15, 0.07, 0.1, 0.0, 1.5, 3.0, 0.0, 0.1, 0.0, 0.07, 0.5, 0.25, 0.004, 0.001, 0.01, 0.12, 30.0, 0.53, 0.05, 0.85, 12.1, 0.01, 0.0, 12.8, 0.33, 1.3, 0.5, 13.5, 1.3, 0.2, 2.35, 0.01, 0.001, 0.85, 0.02, 0.11, 0.01, 0.0, 0.15, 0.13, 0.05, 17.0, 0.25, 3.2, 1.0, 60.2, 0.0, 0.1, 0.14, 0.33, 0.14, 0.0, 0.97, 0.02, 0.0, 0.21, 0.07, 4.3, 0.2, 0.1, 0.01, 0.0, 0.04, 0.4, 0.5, 0.8, 0.4, 0.04, 1.2, 1.5, 7.6, 0.07, 0.04, 0.002, 0.25, 0.0, 0.0, 0.0, 0.0, 0.02, 0.51, 0.1, 0.03, 0.01, 0.0, 0.01, 0.01, 0.01, 0.01, 0.01, 0.15, 3.0, 0.2, 0.01, 0.5, 6.63, 0.75, 0.5, 0.03, 0.11, 0.4, 0.0, 0.05, 0.0, 0.5, 0.0, 0.0, 0.05, 0.14, 0.002, 0.54, 0.0, 0.07, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.03, 0.0, 0.0, 0.0, 0.0

In [6]:
import geopandas as gpd

tornados_gpd = gpd.read_file(TORNADO_PATH)
tornados_gpd.describe()

Unnamed: 0,OID_,om,yr,mo,dy,date,time,tz,st,stf,...,slon,elat,elon,len,wid,fc,Month_Calc,Date_Calc,Shape__Length,geometry
count,28186,28186.0,28186.0,28186.0,28186.0,28186,28186,28186.0,28186,28186.0,...,28186.0,28186.0,28186.0,28186.0,28186.0,28186.0,28186,28186,28186.0,0.0
unique,28186,16007.0,23.0,12.0,31.0,4001,1435,1.0,53,53.0,...,16204.0,16138.0,17423.0,2081.0,332.0,1.0,12,4001,19715.0,0.0
top,1,506.0,2004.0,5.0,24.0,2011-04-27,17:30:00,3.0,TX,48.0,...,-89.67,37.3,-97.5,0.1,50.0,0.0,4,4/27/2011 4:00:00,,
freq,1,12.0,1817.0,6253.0,1360.0,207,152,28186.0,2936,2935.0,...,21.0,24.0,22.0,2771.0,5456.0,28186.0,6253,207,6948.0,
