In [10]:
%matplotlib notebook

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import reverse_geocoder as rg
import pandas as pd
import numpy as np
import copy

In [2]:
#load the combined data for years 1980 to 2020
#This data does not have the country information
file_path="output_data/earthquake_data_1980-2020.csv"
quake_data_complete=pd.read_csv(file_path)
len(quake_data_complete)

67587

In [3]:
#This data contains all magnitudes from the range of year 1984 to 1990
quakes_8489 = quake_data_complete.loc[(quake_data_complete['Year']>=1984) &
                                   (quake_data_complete['Year']<=1989)
                                      ]



In [None]:
#Add the country info from module reverse_geocoder 
#Warning *******( Takes several hours to run)*********
quakes_all_8489= copy.deepcopy(quakes_8489)
quakes_all_8489.loc[:,'Country'] = quakes_all_8489.apply(lambda x: rg.search((x['Lat'], x['Lng']))[0]['cc'], axis=1)  


In [None]:
#quakes_all_8489.to_csv("output_data/quakes_all_8489.csv", index=False)

In [4]:
#Load the quakes category files
lfile_path="output_data/large_quakes_all.csv"
sfile_path="output_data/strong_quakes_all.csv"
mfile_path="output_data/medium_quakes_all.csv"
quakes_major=pd.read_csv(lfile_path)
quakes_strong=pd.read_csv(sfile_path)
quakes_medium=pd.read_csv(mfile_path)
quakes_major.set_index('Year')
quakes_strong.set_index('Year')
quakes_medium.set_index('Year')


Unnamed: 0_level_0,Unnamed: 0,Lat,Lng,Depth,Mag,Sig,Place,Time,Month,Day,Country
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1980,3,38.8150,-27.7800,10.00,6.7,691,"Azores Islands, Portugal",1980-01-01 16:42:40.000,1,1,PT
1980,8,5.9840,126.1880,63.00,6.0,554,"Mindanao, Philippines",1980-01-02 20:58:44.200,1,2,PH
1980,52,41.6790,143.5890,33.00,6.0,554,"Hokkaido, Japan region",1980-01-12 15:57:01.900,1,12,JP
1980,61,-22.3980,-175.0350,33.00,6.0,554,Tonga region,1980-01-14 12:19:40.200,1,14,TO
1980,137,-17.6490,-171.1840,33.00,6.4,630,Tonga region,1980-02-03 11:58:39.800,2,3,NU
...,...,...,...,...,...,...,...,...,...,...,...
2020,67529,-20.3662,-69.0663,105.00,6.1,639,"114 km E of Iquique, Chile",2020-12-06 16:47:42.722,12,6,CL
2020,67544,24.7632,122.0098,73.17,6.1,706,"25 km E of Yilan, Taiwan",2020-12-10 13:19:58.910,12,10,TW
2020,67559,-21.8063,-68.7079,114.00,6.0,571,Chile-Bolivia border region,2020-12-14 15:20:49.542,12,14,CL
2020,67564,5.2071,125.4393,26.87,6.1,580,"Mindanao, Philippines",2020-12-15 23:21:59.435,12,15,PH


In [5]:
#Pick columns required to plot
columns = ['Year', 'Mag']
#From the  dataframes by magnitude isolate US and reduce columns
usa_quakes_major = quakes_major.loc[quakes_major["Country"] == "US", columns]
usa_quakes_strong = quakes_strong.loc[quakes_strong["Country"] == "US", columns]
usa_quakes_medium = quakes_medium.loc[(quakes_medium["Country"] == "US", columns) ]
#groupby year and get average
usa_quakes_major=usa_quakes_major.groupby('Year').mean()
usa_quakes_medium=usa_quakes_medium.groupby('Year').mean()
usa_quakes_strong=usa_quakes_strong.groupby('Year').mean()


In [12]:
fig,(axs1, axs2, axs3) = plt.subplots(3)
#fig,(axs1, axs2) = plt.subplots(2)
fig.suptitle('US Earthquakes over years (1980-2020)')
fig.set_size_inches(7,5)
major, = axs1.plot(usa_quakes_major.index, usa_quakes_major.values, color="red", label="Major quakes")
strong, =axs2.plot(usa_quakes_strong.index, usa_quakes_strong.values, color="blue", label="Strong quakes")
moderate, =axs3.plot(usa_quakes_medium.index, usa_quakes_medium.values, color="green", label="Moderate quakes")

<IPython.core.display.Javascript object>

In [13]:
# Add labels to X and Y axes :: Add title
plt.xlabel("Year")
plt.ylabel("Magnitude")
axs1.legend(handles=[major], loc=1)
axs2.legend(handles=[strong], loc=1)
axs3.legend(handles=[moderate], loc=1)
axs1.grid()
axs2.grid()
axs3.grid()
plt.show()

In [15]:
quakes_all_8489=pd.read_csv("output_data/quakes_all_8489.csv")

In [16]:
quakes_all_8489['Date']=pd.to_datetime(quakes_all_8489['Time']).dt.date
len(quakes_all_8489)

9970

In [17]:
columns = ['Date', 'Mag']
#From entire dataframe isolate US and reduce columns
quakes_all_8489.loc[quakes_all_8489["Country"] == "US",
                    columns].set_index('Date').plot(marker='.',
                                                    ms = 7, mfc = 'r',
                                                    linewidth=0.7, 
                                                    color='purple')


<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='Date'>

In [18]:
plt.title("Earthquakes over Time")
plt.xlabel("Year")
plt.ylabel("Magnitude")

Text(53.402777777777786, 0.5, 'Magnitude')

In [19]:
#Top quakes of  magnitude > 6
us_df=quakes_all_8489[['Lat','Lng', 'Depth', 'Mag', 'Sig', 'Time', 'Place', 'Country']]
quakes_8489=us_df.groupby(['Country']).filter(lambda x: (x['Country']=='US').any())
print(f'{len(quakes_8489)} quakes hit US over the duration of 1984 to 1989')
quakes_8489=quakes_8489.groupby(['Place']).filter(lambda x: (x['Mag']>6).any())
print(f'Out of those {len(quakes_8489)} quakes are of magnitude > 6')



499 quakes hit US over the duration of 1984 to 1989
Out of those 424 quakes are of magnitude > 6


In [20]:
quakes_8489.groupby('Place').count()

Unnamed: 0_level_0,Lat,Lng,Depth,Mag,Sig,Time,Country
Place,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"164 km SE of Chignik, Alaska",1,1,1,1,1,1,1
"17km WNW of Westmorland, CA",1,1,1,1,1,1,1
"22km W of Westmorland, CA",1,1,1,1,1,1,1
"75 km WNW of Ninilchik, Alaska",1,1,1,1,1,1,1
Alaska Peninsula,18,18,18,18,18,18,18
"Andreanof Islands, Aleutian Islands, Alaska",237,237,237,237,237,237,237
Central California,17,17,17,17,17,17,17
"Fox Islands, Aleutian Islands, Alaska",66,66,66,66,66,66,66
Gulf of Alaska,18,18,18,18,18,18,18
"Hawaii region, Hawaii",5,5,5,5,5,5,5
