In [1]:
import pandas as pd

df = pd.read_csv(
    'data/earthquakes.csv', 
    usecols=['time', 'title', 'place', 'magType', 'mag', 'alert', 'tsunami']
)

In [2]:
df.place.str.extract(r', (.*$)')[0].sort_values().unique()

array(['Afghanistan', 'Alaska', 'Argentina', 'Arizona', 'Arkansas',
       'Australia', 'Azerbaijan', 'B.C., MX', 'Barbuda', 'Bolivia',
       'Bonaire, Saint Eustatius and Saba ', 'British Virgin Islands',
       'Burma', 'CA', 'California', 'Canada', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Colorado', 'Costa Rica',
       'Dominican Republic', 'East Timor', 'Ecuador', 'Ecuador region',
       'El Salvador', 'Fiji', 'Greece', 'Greenland', 'Guam', 'Guatemala',
       'Haiti', 'Hawaii', 'Honduras', 'Idaho', 'Illinois', 'India',
       'Indonesia', 'Iran', 'Iraq', 'Italy', 'Jamaica', 'Japan', 'Kansas',
       'Kentucky', 'Kyrgyzstan', 'Martinique', 'Mauritius', 'Mayotte',
       'Mexico', 'Missouri', 'Montana', 'NV', 'Nevada', 'New Caledonia',
       'New Hampshire', 'New Mexico', 'New Zealand', 'Nicaragua',
       'North Carolina', 'Northern Mariana Islands', 'Oklahoma', 'Oregon',
       'Pakistan', 'Papua New Guinea', 'Peru', 'Philippines',
       'Puerto Rico', 'Roman

In [4]:
df['parsed_place'] = df.place.str.replace(
    r'.* of ', '', regex=True # remove anything saying <something> of <something>
).str.replace(
    'the ', '' # remove "the "
).str.replace(
    r'CA$', 'California', regex=True # fix California
).str.replace(
    r'NV$', 'Nevada', regex=True # fix Nevada
).str.replace(
    r'MX$', 'Mexico', regex=True # fix Mexico
).str.replace(
    r' region$', '', regex=True # chop off endings with " region"
).str.replace(
    'northern ', '' # remove "northern "
).str.replace(
    'Fiji Islands', 'Fiji' # line up the Fiji places
).str.replace(
    r'^.*, ', '', regex=True # remove anything else extraneous from the beginning
).str.strip() # remove any extra spaces

In [7]:
df.set_index('parsed_place').filter(like='Japan', axis=0).head()

Unnamed: 0_level_0,alert,mag,magType,place,time,title,tsunami
parsed_place,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Japan,,4.6,mb,"160km NNW of Nago, Japan",1539448501800,"M 4.6 - 160km NNW of Nago, Japan",0
Japan,,5.2,mww,"7km ESE of Asahi, Japan",1539317747370,"M 5.2 - 7km ESE of Asahi, Japan",0
Japan,,4.5,mwr,"14km E of Tomakomai, Japan",1539303265990,"M 4.5 - 14km E of Tomakomai, Japan",0
Japan,,4.7,mb,"139km WSW of Naze, Japan",1539238726290,"M 4.7 - 139km WSW of Naze, Japan",0
Japan,,4.6,mb,"53km ESE of Kamaishi, Japan",1539115120470,"M 4.6 - 53km ESE of Kamaishi, Japan",0


In [22]:
df[(df.parsed_place == 'Japan') 
   & (df.magType =='mb')].mag.quantile(0.95)

4.9

In [85]:
per = (df[(df['parsed_place'] == 'Indonesia') 
    & (df['tsunami'] == True)].shape[0] 
 / df[df['parsed_place'] == 'Indonesia'].shape[0])
f"{per:.2%}"

'23.13%'

In [89]:
df[df.parsed_place == 'Nevada'].describe(include='all')

Unnamed: 0,alert,mag,magType,place,time,title,tsunami,parsed_place,ring_of_fire
count,0.0,681.0,681,681,681.0,681,681.0,681,681
unique,0.0,,2,419,,630,,1,1
top,,,ml,"55km ENE of Beatty, Nevada",,"M -0.3 - 55km ENE of Beatty, Nevada",,Nevada,False
freq,,,680,14,,4,,681,681
mean,,0.500073,,,1538314000000.0,,0.0,,
std,,0.69671,,,596563700.0,,0.0,,
min,,-0.5,,,1537247000000.0,,0.0,,
25%,,-0.1,,,1537854000000.0,,0.0,,
50%,,0.4,,,1538280000000.0,,0.0,,
75%,,0.9,,,1538821000000.0,,0.0,,


In [77]:
ring_of_fire =  ['Alaska', 'Antarctic', 'Bolivia', 'California', 'Canada', 'Chile', 'Costa Rica', 
          'Ecuador', 'Fiji', 'Guatemala', 'Indonesia', 'Japan', 'Kermadec Islands', '(?<!New\s)Mexico', 
          'New Zealand', 'Peru', 'Philippines', 'Russia', 'Taiwan', 'Tonga', 'Washington']

In [78]:
df['ring_of_fire'] = df.parsed_place.str.contains(r'|'.join(ring_of_fire), regex=True)

In [79]:
print(f"inside of RoF: {df['ring_of_fire'].sum()}")
print(f"outside of RoF: {len(df) - df['ring_of_fire'].sum()}")

inside of RoF: 7188
outside of RoF: 2144


In [80]:
df.ring_of_fire.value_counts()

True     7188
False    2144
Name: ring_of_fire, dtype: int64

In [81]:
df[df.parsed_place == 'New Mexico']

Unnamed: 0,alert,mag,magType,place,time,title,tsunami,parsed_place,ring_of_fire
9016,,2.5,mb_lg,"35km W of Raton, New Mexico",1537304402660,"M 2.5 - 35km W of Raton, New Mexico",0,New Mexico,False


In [82]:
df[df['ring_of_fire'] & df['tsunami']].shape[0]

45