In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import statsmodels.api as sm
import re
import plotly.express as px
import requests
from bs4 import BeautifulSoup as BS
from io import StringIO
%matplotlib inline

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_deadly_earthquakes_since_1900'

In [3]:
wiki = pd.read_html(url)

In [4]:
wiki = wiki[4]

In [5]:
wiki = wiki.rename(columns={"Origin (UTC)":"origin", "Present-day country and link to Wikipedia article":"country",
       "Lat":"lat", "Long":"long", "Depth (km)":"depth_km", "Magnitude":"magnitude", "Secondary Effects":"secondary_effects",
       "PDE Shaking Deaths":"pde_shaking_deaths", "PDE Total Deaths":"pde_total_deaths", "Utsu Total Deaths":"utsu_total_deaths",
       "EM-DAT Total Deaths":"emdat_total_deaths", "Other Source Deaths":"other_source_deaths"})

In [6]:
wiki

Unnamed: 0,origin,country,lat,long,depth_km,magnitude,secondary_effects,pde_shaking_deaths,pde_total_deaths,utsu_total_deaths,emdat_total_deaths,other_source_deaths
0,1900-05-11 17:23,Japan,38.700,141.100,5,7.0 MJMA,,,,,,
1,1900-07-12 06:25,Turkey,40.300,43.100,,5.9 Muk,,,,140.0,,
2,1900-10-29 09:11,Venezuela,11.000,-66.000,0,7.7 Mw,,,,,,
3,1901-02-15 00:00,China,26.000,100.100,0,6.5 Ms,,,,,,
4,1901-03-31 07:11,Bulgaria,43.400,28.700,,6.4 Muk,,,,4.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1335,2011-03-24 20:25,Burma (see 2011 Burma earthquake),,,,6.8 Mw,,,,,,150+
1336,2011-04-07 14:32,Japan (see April 2011 Miyagi earthquake),38.2,140.0,66.0,7.1 Mw,,,,,,
1337,2011-09-18 12:40,India (see 2011 Sikkim earthquake),27.723,88.064,19.7,6.9 Mw,L,,,,,111+
1338,2011-09-23 10:41,Turkey (see 2011 Van earthquake),38.6,43.5,7.2,7.1 Mw,,,,,,"601 (as of October 30, 2011)"


In [7]:
# delete other sources column
del wiki['other_source_deaths']

In [8]:
wiki.columns

Index(['origin', 'country', 'lat', 'long', 'depth_km', 'magnitude',
       'secondary_effects', 'pde_shaking_deaths', 'pde_total_deaths',
       'utsu_total_deaths', 'emdat_total_deaths'],
      dtype='object')

In [9]:
# new column for largest death in column
wiki['deaths'] = wiki[['pde_shaking_deaths', 'pde_total_deaths',
       'utsu_total_deaths', 'emdat_total_deaths']].max(axis=1)

In [10]:
#split on space to create new columns
wiki[['magnitude','measure','del']] = wiki['magnitude'].str.split(' ',expand=True)
wiki

Unnamed: 0,origin,country,lat,long,depth_km,magnitude,secondary_effects,pde_shaking_deaths,pde_total_deaths,utsu_total_deaths,emdat_total_deaths,deaths,measure,del
0,1900-05-11 17:23,Japan,38.700,141.100,5,7.0,,,,,,,MJMA,
1,1900-07-12 06:25,Turkey,40.300,43.100,,5.9,,,,140.0,,140.0,Muk,
2,1900-10-29 09:11,Venezuela,11.000,-66.000,0,7.7,,,,,,,Mw,
3,1901-02-15 00:00,China,26.000,100.100,0,6.5,,,,,,,Ms,
4,1901-03-31 07:11,Bulgaria,43.400,28.700,,6.4,,,,4.0,,4.0,Muk,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1335,2011-03-24 20:25,Burma (see 2011 Burma earthquake),,,,6.8,,,,,,,Mw,
1336,2011-04-07 14:32,Japan (see April 2011 Miyagi earthquake),38.2,140.0,66.0,7.1,,,,,,,Mw,
1337,2011-09-18 12:40,India (see 2011 Sikkim earthquake),27.723,88.064,19.7,6.9,L,,,,,,Mw,
1338,2011-09-23 10:41,Turkey (see 2011 Van earthquake),38.6,43.5,7.2,7.1,,,,,,,Mw,


In [11]:
#created three columns deleted non essential
del wiki['del']

In [12]:
wiki['country'] = wiki['country'].str.replace('( \(.*\)?)','').str.replace('(\(see 1997 Cariaco earthquake\))','Venezuela').str.replace('(Iran, 2005 Qeshm earthquake)','Iran')

In [14]:
wiki['country']= wiki['country'].str.replace(
    '( \(.*\)?)','').str.replace(
    '(\(see 1997 Cariaco earthquake\))','Venezuela').str.replace(
    '(Iran, 2005 Qeshm earthquake)','Iran').str.replace(
    'US Territory of Alaska','United States').str.replace(
    'Mandatory Palestine and Transjordan','Palestine').str.replace(
    'Burma','Myanmar').str.replace(
    'Tajik Soviet Socialist Republic','Tajikistan').str.replace(
    'Kingdom of Yugoslavia','Yugoslavia'
)

In [15]:
wiki

Unnamed: 0,origin,country,lat,long,depth_km,magnitude,secondary_effects,pde_shaking_deaths,pde_total_deaths,utsu_total_deaths,emdat_total_deaths,deaths,measure
0,1900-05-11 17:23,Japan,38.700,141.100,5,7.0,,,,,,,MJMA
1,1900-07-12 06:25,Turkey,40.300,43.100,,5.9,,,,140.0,,140.0,Muk
2,1900-10-29 09:11,Venezuela,11.000,-66.000,0,7.7,,,,,,,Mw
3,1901-02-15 00:00,China,26.000,100.100,0,6.5,,,,,,,Ms
4,1901-03-31 07:11,Bulgaria,43.400,28.700,,6.4,,,,4.0,,4.0,Muk
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1335,2011-03-24 20:25,Myanmar,,,,6.8,,,,,,,Mw
1336,2011-04-07 14:32,Japan,38.2,140.0,66.0,7.1,,,,,,,Mw
1337,2011-09-18 12:40,India,27.723,88.064,19.7,6.9,L,,,,,,Mw
1338,2011-09-23 10:41,Turkey,38.6,43.5,7.2,7.1,,,,,,,Mw
