In [1]:
import requests
#from bs4 import BeautifulSoup as bs
import pandas as pd
from io import StringIO
#import geopandas as gpd

# Part 2 Data Retrieval
Use the `requests` library and the USGS's API (https://earthquake.usgs.gov/fdsnws/event/1/) to retrieve information about all recorded earthquakes that occurred in Tennessee since 1900.
Answer the following questions:

In [2]:
#a list of all the catalogs available, if needed
cat_url = 'https://earthquake.usgs.gov/fdsnws/event/1/catalogs'
#The data dictionary for the dataset
#https://earthquake.usgs.gov/data/comcat/data-eventterms.php#magNst

In [3]:
#two catalogs seem useful
baseurl = 'https://earthquake.usgs.gov/fdsnws/event/1/query?format=csv&starttime=1900-01-01&endtime=2020-12-31&catalog='
catalog = ['ushis'
           , 'se'
          ]
tn_eq = pd.DataFrame()
for cat in catalog:
    url = baseurl + cat
    request = requests.get(url)
    temp_df = pd.read_csv(StringIO(request.text))
    tn_eq = pd.concat([temp_df, tn_eq]).reset_index()
    print(url)

https://earthquake.usgs.gov/fdsnws/event/1/query?format=csv&starttime=1900-01-01&endtime=2020-12-31&catalog=ushis
https://earthquake.usgs.gov/fdsnws/event/1/query?format=csv&starttime=1900-01-01&endtime=2020-12-31&catalog=se


In [4]:
#check that we have both catalogs
tn_eq.locationSource.unique()

array(['se', 'ushis'], dtype=object)

In [5]:
#check
tn_eq.head()

Unnamed: 0,level_0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,...,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource,index
0,0,2020-10-21T15:38:34.380Z,36.867167,-83.242833,9.37,2.25,md,16.0,98.0,0.375,...,"4 km W of Evarts, Kentucky",earthquake,0.59,1.91,0.049,11.0,reviewed,se,se,
1,1,2020-10-21T09:44:44.720Z,36.4865,-81.110667,0.74,-0.67,md,4.0,183.0,0.01516,...,"2 km SSE of Sparta, North Carolina",earthquake,0.49,1.78,0.007,4.0,reviewed,se,se,
2,2,2020-10-21T02:52:28.740Z,36.484167,-81.100167,7.18,0.19,md,6.0,123.0,0.0071,...,"3 km SE of Sparta, North Carolina",earthquake,0.76,0.62,0.396,6.0,reviewed,se,se,
3,3,2020-10-19T20:19:38.490Z,36.224,-82.817167,17.27,2.31,md,8.0,70.0,0.1751,...,"6 km N of Greeneville, Tennessee",earthquake,0.91,1.69,0.099,6.0,reviewed,se,se,
4,4,2020-10-18T16:54:12.390Z,36.485833,-81.082,0.71,-0.61,md,4.0,147.0,0.01236,...,"4 km ESE of Sparta, North Carolina",earthquake,0.34,1.02,0.105,4.0,reviewed,se,se,


In [6]:
#check
tn_eq.shape

(6228, 24)

In [7]:
#find any records that don't have a valid place
tn_eq[tn_eq['place'].isnull()]

Unnamed: 0,level_0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,...,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource,index
4100,1656,1961-09-11T02:46:51.700Z,51.4,180.0,60.0,5.9,ms,,,,...,,earthquake,,,,,reviewed,ushis,rot,1656.0
5045,2601,1946-12-25T11:13:10.000Z,51.5,180.0,90.0,6.5,mb,,,,...,,earthquake,,,,,reviewed,ushis,gr,2601.0
6091,3647,1911-09-17T03:26:00.000Z,51.0,180.0,,7.1,ms,,,,...,,earthquake,,,,,reviewed,ushis,abe,3647.0


In [8]:
#and drop
tn_eq = tn_eq.dropna(subset=['place'])
tn_eq.shape

(6225, 24)

In [9]:
#find any records that don't have a valid mag
tn_eq[tn_eq['mag'].isnull()]

Unnamed: 0,level_0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,...,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource,index
4048,1604,1962-08-31T17:56:10.000Z,51.27,179.86,55.0,,,,,,...,"Rat Islands, Aleutian Islands, Alaska",earthquake,,,,,reviewed,ushis,ushis,1604.0
4080,1636,1962-01-23T15:59:28.600Z,52.80,-169.00,65.0,,,,,,...,"Fox Islands, Aleutian Islands, Alaska",earthquake,,,,,reviewed,ushis,ushis,1636.0
4081,1637,1962-01-03T17:53:10.000Z,52.31,177.48,77.0,,,,,,...,"Rat Islands, Aleutian Islands, Alaska",earthquake,,,,,reviewed,ushis,ushis,1637.0
4082,1638,1962-01-01T02:41:12.400Z,52.20,177.70,48.0,,,,,,...,"Rat Islands, Aleutian Islands, Alaska",earthquake,,,,,reviewed,ushis,ushis,1638.0
4104,1660,1961-08-25T06:59:28.400Z,53.52,-161.26,,,,,,,...,south of Alaska,earthquake,,,,,reviewed,ushis,ushis,1660.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6217,3773,1901-12-30T00:00:00.000Z,59.40,-153.50,,,,,,,...,Southern Alaska,earthquake,,,,,reviewed,ushis,ushis,3773.0
6218,3774,1901-11-14T04:32:00.000Z,38.70,-112.10,,,,,,,...,Utah,earthquake,,,,,reviewed,ushis,ushis,3774.0
6222,3778,1900-12-27T10:15:00.000Z,58.00,-150.00,,,,,,,...,Gulf of Alaska,earthquake,,,,,reviewed,ushis,ushis,3778.0
6224,3780,1900-08-01T07:45:00.000Z,40.00,-112.10,,,,,,,...,Utah,earthquake,,,,,reviewed,ushis,ushis,3780.0


In [10]:
#where are they?
tn_eq[tn_eq['mag'].isnull()]['place'].unique()

array(['Rat Islands, Aleutian Islands, Alaska',
       'Fox Islands, Aleutian Islands, Alaska', 'south of Alaska',
       'New Mexico', 'Andreanof Islands, Aleutian Islands, Alaska',
       'Utah', 'Alaska Peninsula', 'southern Idaho',
       'Kenai Peninsula, Alaska', 'western Montana',
       'Near Islands, Aleutian Islands, Alaska', 'Southern Alaska',
       'Nevada', 'northern Idaho', 'Washington', 'Hawaii region, Hawaii',
       'Colorado', 'Central Alaska', 'Pennsylvania', 'northern Alaska',
       'Kodiak Island region, Alaska', 'Tennessee', 'Maui region, Hawaii',
       'Vermont', 'Unimak Island region, Alaska',
       'Puget Sound region, Washington', 'Arizona',
       'Wasatch Front Urban Corridor, Utah',
       'Portland urban area, Oregon', 'Southern California',
       'Northern California', 'Wyoming',
       'Seattle-Tacoma urban area, Washington',
       'Albuquerque urban area, New Mexico', 'Oregon',
       'south of the Aleutian Islands', 'New Jersey', 'North Carolina'

In [11]:
#How many in Tennessee?
tn_eq[(tn_eq['mag'].isnull()) & (tn_eq['place'] == 'Tennessee')]

Unnamed: 0,level_0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,...,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource,index
4828,2384,1952-07-16T23:48:10.000Z,36.2,-89.6,,,,,,,...,Tennessee,earthquake,,,,,reviewed,ushis,ushis,2384.0


In [12]:
#and drop
tn_eq = tn_eq.dropna(subset=['mag'])
tn_eq.shape

(5797, 24)

In [13]:
#filter for earthquake 'type'
#tn_eq.type.unique()
tn_eq = tn_eq[tn_eq['type'] == 'earthquake']
tn_eq.shape

(5795, 24)

In [14]:
#filter for tennessee 'place'
tn_eq = tn_eq[(tn_eq['place'].str.contains('Tenn'))
             | (tn_eq['place'].str.contains('TN'))
             ]
tn_eq.shape

(1075, 24)

In [15]:
tn_eq.columns

Index(['level_0', 'time', 'latitude', 'longitude', 'depth', 'mag', 'magType',
       'nst', 'gap', 'dmin', 'rms', 'net', 'id', 'updated', 'place', 'type',
       'horizontalError', 'depthError', 'magError', 'magNst', 'status',
       'locationSource', 'magSource', 'index'],
      dtype='object')

In [16]:
for col in tn_eq.columns:
    print(tn_eq[col].name)
    print(tn_eq[col].unique())

level_0
[   3    5   21 ... 3529 3625 3748]
time
['2020-10-19T20:19:38.490Z' '2020-10-17T07:12:44.060Z'
 '2020-10-09T20:24:02.590Z' ... '1918-10-16T02:15:00.000Z'
 '1913-03-28T21:50:00.000Z' '1903-11-04T18:18:00.000Z']
latitude
[36.224     35.3668333 36.1018333 35.7063333 36.266     35.7001667
 35.5391667 35.313     36.2363333 35.1968333 36.0293333 35.9183333
 35.9191667 36.1701667 35.2451667 35.5681667 35.6755    35.5373333
 35.5028333 35.4858333 35.3251667 35.5445    36.0703333 35.7243333
 36.3035    36.041     36.0578333 35.6746667 35.4123333 35.624
 35.7846667 35.7793333 35.054     35.142     35.1405    35.228
 36.3036667 35.5728333 35.6648333 35.8775    35.644     36.168
 35.1616667 35.9293333 36.0991667 35.086     36.4875    35.1911667
 36.4295    35.5938333 35.675     35.677     35.6808333 35.6615
 35.9398333 35.5941667 36.4343333 36.4305    36.4308333 35.3226667
 35.4188333 35.534     35.5345    35.7528333 35.62      35.7533333
 35.8296667 35.6916667 35.8781667 35.4333333 35.38

In [17]:
tn_eq.to_csv('../Data/tenn_eq.csv')