In [36]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [37]:
import pandas as pd
import numpy as np
import requests
from io import StringIO
from bs4 import BeautifulSoup as BS

## Using API to grab data from USGS

Data Dictionary: https://earthquake.usgs.gov/data/comcat/data-eventterms.php

In [38]:
# Send get request
# All recorded earthquakes that occurred in Tennessee since 1900
url = 'https://earthquake.usgs.gov/fdsnws/event/1/query'
params = {'format': 'csv', 
           'starttime': '1900-01-01',
           'minlatitude':'34.730', # Huntsville, AL
           'maxlatitude':'37.002', # Bowling Green, KY 
           'minlongitude':'-90.710', # Jonesboro, AR
           'maxlongitude':'-80.843', # Charlotte, NC
           'starttime' : '1900-01-01',
           'limit':[1,20000]}
r = requests.get(url=url, params=params)
r.text

# Convert to Pandas DF
earthquake_tn = pd.read_csv(StringIO(r.text))
earthquake_tn.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,id,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
0,2020-10-21T21:49:45.720Z,36.0705,-89.802333,6.84,2.14,md,46.0,40.0,0.0643,0.1,nm,nm60312762,2020-10-22T12:49:53.160Z,"2 km NNE of Cooter, Missouri",earthquake,0.17,0.49,0.092,31.0,reviewed,nm,nm
1,2020-10-21T20:28:42.930Z,36.525,-89.569167,6.85,1.69,md,19.0,50.0,0.02596,0.03,nm,nm60312752,2020-10-22T14:45:56.660Z,"3 km E of Marston, Missouri",earthquake,0.34,0.63,0.167,12.0,reviewed,nm,nm
2,2020-10-21T15:38:34.380Z,36.867167,-83.242833,9.37,2.25,md,16.0,98.0,0.375,0.32,se,se60312717,2020-10-21T18:59:51.529Z,"4 km W of Evarts, Kentucky",earthquake,0.59,1.91,0.049,11.0,reviewed,se,se
3,2020-10-21T09:44:44.720Z,36.4865,-81.110667,0.74,-0.67,md,4.0,183.0,0.01516,0.01,se,se60312707,2020-10-21T12:31:57.890Z,"2 km SSE of Sparta, North Carolina",earthquake,0.49,1.78,0.007,4.0,reviewed,se,se
4,2020-10-21T04:55:51.350Z,36.545667,-89.649,8.01,1.61,md,24.0,44.0,0.02245,0.04,nm,nm60312697,2020-10-21T13:04:57.100Z,"4 km NW of Marston, Missouri",earthquake,0.44,0.4,0.147,21.0,reviewed,nm,nm


In [39]:
earthquake_tn.shape

(10063, 22)

In [40]:
earthquake_tn.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10063 entries, 0 to 10062
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   time             10063 non-null  object 
 1   latitude         10063 non-null  float64
 2   longitude        10063 non-null  float64
 3   depth            10045 non-null  float64
 4   mag              10059 non-null  float64
 5   magType          10049 non-null  object 
 6   nst              6790 non-null   float64
 7   gap              6786 non-null   float64
 8   dmin             6565 non-null   float64
 9   rms              6808 non-null   float64
 10  net              10063 non-null  object 
 11  id               10063 non-null  object 
 12  updated          10063 non-null  object 
 13  place            10063 non-null  object 
 14  type             10063 non-null  object 
 15  horizontalError  6849 non-null   float64
 16  depthError       6848 non-null   float64
 17  magError    

In [41]:
# Export: Save raw data to file
earthquake_tn.to_csv('../data/earthquake_tn_raw.csv', index = False)

## Rename/Drop columns based on Data Dictionary

In [44]:
# time: set correct type, rename for match to "date"
earthquake_tn["time"] = earthquake_tn["time"].astype("datetime64")
earthquake_tn = earthquake_tn.rename(columns = {"time": "date"})
earthquake_tn.head()

Unnamed: 0,date,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,id,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
0,2020-10-21 21:49:45.720,36.0705,-89.802333,6.84,2.14,md,46.0,40.0,0.0643,0.1,nm,nm60312762,2020-10-22T12:49:53.160Z,"2 km NNE of Cooter, Missouri",earthquake,0.17,0.49,0.092,31.0,reviewed,nm,nm
1,2020-10-21 20:28:42.930,36.525,-89.569167,6.85,1.69,md,19.0,50.0,0.02596,0.03,nm,nm60312752,2020-10-22T14:45:56.660Z,"3 km E of Marston, Missouri",earthquake,0.34,0.63,0.167,12.0,reviewed,nm,nm
2,2020-10-21 15:38:34.380,36.867167,-83.242833,9.37,2.25,md,16.0,98.0,0.375,0.32,se,se60312717,2020-10-21T18:59:51.529Z,"4 km W of Evarts, Kentucky",earthquake,0.59,1.91,0.049,11.0,reviewed,se,se
3,2020-10-21 09:44:44.720,36.4865,-81.110667,0.74,-0.67,md,4.0,183.0,0.01516,0.01,se,se60312707,2020-10-21T12:31:57.890Z,"2 km SSE of Sparta, North Carolina",earthquake,0.49,1.78,0.007,4.0,reviewed,se,se
4,2020-10-21 04:55:51.350,36.545667,-89.649,8.01,1.61,md,24.0,44.0,0.02245,0.04,nm,nm60312697,2020-10-21T13:04:57.100Z,"4 km NW of Marston, Missouri",earthquake,0.44,0.4,0.147,21.0,reviewed,nm,nm


In [45]:
# Drop non-necessary columns
earthquake_tn = earthquake_tn.drop(columns = ["magType", "nst", "gap", "dmin", "rms", "id", "magNst", "locationSource", "magSource"])
earthquake_tn.head()

Unnamed: 0,date,latitude,longitude,depth,mag,net,updated,place,type,horizontalError,depthError,magError,status
0,2020-10-21 21:49:45.720,36.0705,-89.802333,6.84,2.14,nm,2020-10-22T12:49:53.160Z,"2 km NNE of Cooter, Missouri",earthquake,0.17,0.49,0.092,reviewed
1,2020-10-21 20:28:42.930,36.525,-89.569167,6.85,1.69,nm,2020-10-22T14:45:56.660Z,"3 km E of Marston, Missouri",earthquake,0.34,0.63,0.167,reviewed
2,2020-10-21 15:38:34.380,36.867167,-83.242833,9.37,2.25,se,2020-10-21T18:59:51.529Z,"4 km W of Evarts, Kentucky",earthquake,0.59,1.91,0.049,reviewed
3,2020-10-21 09:44:44.720,36.4865,-81.110667,0.74,-0.67,se,2020-10-21T12:31:57.890Z,"2 km SSE of Sparta, North Carolina",earthquake,0.49,1.78,0.007,reviewed
4,2020-10-21 04:55:51.350,36.545667,-89.649,8.01,1.61,nm,2020-10-21T13:04:57.100Z,"4 km NW of Marston, Missouri",earthquake,0.44,0.4,0.147,reviewed


In [46]:
earthquake_tn.shape

(10063, 13)

In [47]:
# Rename columns
earthquake_tn.columns = ["date","latitude","longitude",
                         "depth","magnitude", "news_source", 
                         "latest_updated", "place", "seismic_type", 
                         "location_error", "depth_error",
                         "magnitude_error","review_status"]
earthquake_tn.head()

Unnamed: 0,date,latitude,longitude,depth,magnitude,news_source,latest_updated,place,seismic_type,location_error,depth_error,magnitude_error,review_status
0,2020-10-21 21:49:45.720,36.0705,-89.802333,6.84,2.14,nm,2020-10-22T12:49:53.160Z,"2 km NNE of Cooter, Missouri",earthquake,0.17,0.49,0.092,reviewed
1,2020-10-21 20:28:42.930,36.525,-89.569167,6.85,1.69,nm,2020-10-22T14:45:56.660Z,"3 km E of Marston, Missouri",earthquake,0.34,0.63,0.167,reviewed
2,2020-10-21 15:38:34.380,36.867167,-83.242833,9.37,2.25,se,2020-10-21T18:59:51.529Z,"4 km W of Evarts, Kentucky",earthquake,0.59,1.91,0.049,reviewed
3,2020-10-21 09:44:44.720,36.4865,-81.110667,0.74,-0.67,se,2020-10-21T12:31:57.890Z,"2 km SSE of Sparta, North Carolina",earthquake,0.49,1.78,0.007,reviewed
4,2020-10-21 04:55:51.350,36.545667,-89.649,8.01,1.61,nm,2020-10-21T13:04:57.100Z,"4 km NW of Marston, Missouri",earthquake,0.44,0.4,0.147,reviewed


- time => date --> Break down to year, month, day, time
- latitude
- longitude
- depth
- mag => magnitude
- magType => drop
- nst => drop
- gap => drop
- dmin => drop
- rms => drop
- net => news_source
- id => drop
- updated => latest_updated
- place => Cleanup to closest_place and state
- type => seismic_type
- horizontalError => location_error
- depthError => depth_error
- magError => magnitude_error
- magNst => drop
- status => review_status
- locationSource => drop
- magSource => drop

## Format Date

Now, let's extract each parts: `year`, `month`, `day`, `time`, and `timestamp`

In [48]:
earthquake_tn['year'] = earthquake_tn['date'].dt.year
earthquake_tn['month'] = earthquake_tn['date'].dt.month
earthquake_tn['day'] = earthquake_tn['date'].dt.day
earthquake_tn['time'] = earthquake_tn['date'].dt.time

earthquake_tn.head()

Unnamed: 0,date,latitude,longitude,depth,magnitude,news_source,latest_updated,place,seismic_type,location_error,depth_error,magnitude_error,review_status,year,month,day,time
0,2020-10-21 21:49:45.720,36.0705,-89.802333,6.84,2.14,nm,2020-10-22T12:49:53.160Z,"2 km NNE of Cooter, Missouri",earthquake,0.17,0.49,0.092,reviewed,2020,10,21,21:49:45.720000
1,2020-10-21 20:28:42.930,36.525,-89.569167,6.85,1.69,nm,2020-10-22T14:45:56.660Z,"3 km E of Marston, Missouri",earthquake,0.34,0.63,0.167,reviewed,2020,10,21,20:28:42.930000
2,2020-10-21 15:38:34.380,36.867167,-83.242833,9.37,2.25,se,2020-10-21T18:59:51.529Z,"4 km W of Evarts, Kentucky",earthquake,0.59,1.91,0.049,reviewed,2020,10,21,15:38:34.380000
3,2020-10-21 09:44:44.720,36.4865,-81.110667,0.74,-0.67,se,2020-10-21T12:31:57.890Z,"2 km SSE of Sparta, North Carolina",earthquake,0.49,1.78,0.007,reviewed,2020,10,21,09:44:44.720000
4,2020-10-21 04:55:51.350,36.545667,-89.649,8.01,1.61,nm,2020-10-21T13:04:57.100Z,"4 km NW of Marston, Missouri",earthquake,0.44,0.4,0.147,reviewed,2020,10,21,04:55:51.350000


## Cleanup `place`

In [None]:
earthquake_tn["closest_city"] = earthquake_tn["place"].apply(lambda st: st.split(""))