In [1]:
import pandas as pd
from datetime import datetime
import math
import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [5]:
from postgresql.connection import create_connection, close_connection
from config import settings

conn = create_connection(
            settings.database.database_name,
            settings.database.username,
            settings.database.password,
            settings.database.host,
            settings.database.port,
        )


events_df=pd.read_sql('Select * from events',conn)
nearby_cities=pd.read_sql('Select * from nearby_cities',conn)

## drop NaNs
events_df = events_df.dropna().reset_index(drop=True)

Connection to PostgreSQL DB successful


### Q6. Biggest earthquake in 2017

In [6]:
events_with_max_magnitude=events_df[events_df['property.mag']==events_df['property.mag'].max()]
events_with_max_magnitude

Unnamed: 0,id,type,property.mag,property.place,property.detail,geometry.type,geometry.coordinates,generated_at,created_at,updated_at
39195,us2000ahv0,Feature,8.0,"near the coast of Chiapas, Mexico",https://earthquake.usgs.gov/fdsnws/event/1/que...,Point,"[-94, 15, 47]",1668030409000,2017-09-06 00:37:25+00:00,2017-12-07 20:00:15+00:00


In [7]:
def get_magnitude_bucket(x: float):
    """
    Given a magnitude, this method decides magnitude bucket
    following the rule
    """
    if x < 0: 
        return '0-1'
    elif x > 6: 
        return '>6'
    else: 
        return str(math.floor(x))+'-'+str(math.floor(x)+1)

#### Identify magnitude_bucket for each earthquake event

In [8]:
events_df['hour'] = events_df['created_at'].apply(lambda x: x.hour)
events_df['magnitude_bucket'] = events_df['property.mag'].apply (lambda x: get_magnitude_bucket(x))
events_df.head()

Unnamed: 0,id,type,property.mag,property.place,property.detail,geometry.type,geometry.coordinates,generated_at,created_at,updated_at,hour,magnitude_bucket
0,ci37842271,Feature,1.0,"8km NE of Aguanga, CA",https://earthquake.usgs.gov/fdsnws/event/1/que...,Point,"[-117, 33, 4]",1668030350000,2017-10-31 11:51:02+00:00,2020-07-06 20:14:01+00:00,11,1-2
1,us1000byd4,Feature,4.0,"15 km N of Weston, Colorado",https://earthquake.usgs.gov/fdsnws/event/1/que...,Point,"[-105, 37, 4]",1668030350000,2017-10-31 11:51:02+00:00,2020-07-06 20:14:01+00:00,11,4-5
2,ak017gqdjyv3,Feature,1.0,"79 km NNW of Alatna, Alaska",https://earthquake.usgs.gov/fdsnws/event/1/que...,Point,"[-153, 67, 11]",1668030350000,2017-10-31 11:51:02+00:00,2020-07-06 20:14:01+00:00,11,1-2
3,ci37842255,Feature,1.0,"6km N of Big Bear City, CA",https://earthquake.usgs.gov/fdsnws/event/1/que...,Point,"[-117, 34, 1]",1668030350000,2017-10-31 11:51:02+00:00,2020-07-06 20:14:01+00:00,11,1-2
4,us1000byd0,Feature,4.0,"61 km E of Kyzyl-Eshme, Kyrgyzstan",https://earthquake.usgs.gov/fdsnws/event/1/que...,Point,"[73, 39, 22]",1668030350000,2017-10-31 11:51:02+00:00,2020-07-06 20:14:01+00:00,11,4-5


#### The following cell depicts the earthquake frequency per magnitude bucket and per hour of the day

In [9]:
earthquake_freq_per_bucket_hr=events_df.groupby(['magnitude_bucket', 'hour']).size().to_frame('earthquake_freq').reset_index()
earthquake_freq_per_bucket_hr.head()

Unnamed: 0,magnitude_bucket,hour,earthquake_freq
0,0-1,0,12072
1,0-1,10,14639
2,0-1,11,7370
3,0-1,13,8228
4,0-1,17,6036


### Q7.  The highest probable hour of the day for earthquakes bucketed by magnitudes of earthquake

In [16]:
indices_to_fetch = earthquake_freq_per_bucket_hr.groupby(['magnitude_bucket'])['earthquake_freq'].transform(max) == earthquake_freq_per_bucket_hr['earthquake_freq']
earthquake_freq_per_bucket_hr[indices_to_fetch]

Unnamed: 0,magnitude_bucket,hour,earthquake_freq
1,0-1,10,14639
6,1-2,10,15787
11,2-3,10,4677
16,3-4,10,1109
21,4-5,10,3337
26,5-6,10,415
31,6-7,10,27
35,>6,0,4
