In [1]:
from shapely.geometry import Point
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster, HeatMap, HeatMapWithTime
import ipywidgets as widgets
from IPython.display import HTML, display
from collections import defaultdict
import re

In [2]:
eq_table = pd.read_csv('../Data/04_deadly_eq_mag_cleaned.csv', index_col=False)

In [3]:
eq_table.columns

Index(['origin (utc)', 'present-day country and link to wikipedia article',
       'lat', 'long', 'depth (km)', 'magnitude', 'secondary effects',
       'pde shaking deaths', 'pde total deaths', 'utsu total deaths',
       'em-dat total deaths', 'other source deaths', 'deaths',
       'original_magnitude', 'original_magnitude_scale'],
      dtype='object')

In [4]:
eq_table['origin (utc)'] = pd.to_datetime(eq_table['origin (utc)'])
eq_table['origin_utc_string'] = eq_table['origin (utc)'].dt.strftime("%Y-%m-%dT%H:%M:%S")

In [5]:
eq_table['lat'] = pd.to_numeric(eq_table['lat'], errors='coerce')

In [6]:
eq_table['long'] = pd.to_numeric(eq_table['long'], errors='coerce')

In [7]:
def lat_long_to_point(row):
    return Point(row['long'], row['lat'])

In [8]:
eq_table['position'] = eq_table.apply(lat_long_to_point, axis=1)

In [9]:
eq_table_geo = gpd.GeoDataFrame(eq_table, crs='epsg:4326', geometry=eq_table['position'])

In [10]:
eq_table_geo.columns

Index(['origin (utc)', 'present-day country and link to wikipedia article',
       'lat', 'long', 'depth (km)', 'magnitude', 'secondary effects',
       'pde shaking deaths', 'pde total deaths', 'utsu total deaths',
       'em-dat total deaths', 'other source deaths', 'deaths',
       'original_magnitude', 'original_magnitude_scale', 'origin_utc_string',
       'position', 'geometry'],
      dtype='object')

In [11]:
eq_table_geo['original_magnitude'] = eq_table_geo['original_magnitude'].fillna(0.0)

In [12]:
eq_table_geo['present-day country and link to wikipedia article'].nunique()

397

In [13]:
eq_table_geo['present-day country and link to wikipedia article'].describe()

count      1340
unique      397
top       China
freq        132
Name: present-day country and link to wikipedia article, dtype: object

In [14]:
eq_table_geo = eq_table_geo.rename(columns={'present-day country and link to wikipedia article':'country'})

In [15]:
eq_table_geo.value_counts('country')

country
China                                             132
Indonesia                                          81
Iran                                               78
Japan                                              62
Turkey                                             60
                                                 ... 
New Zealand (see 2007 Gisborne earthquake)          1
New Zealand (see 2011 Christchurch earthquake)      1
Nicaragua (see 1931 Nicaragua earthquake)           1
Nicaragua (see 1972 Nicaragua earthquake)           1
(see 1997 Cariaco earthquake)                       1
Length: 397, dtype: int64

In [16]:
eq_table_geo.iloc[1033]

origin (utc)                          1997-07-09 19:24:00
country                     (see 1997 Cariaco earthquake)
lat                                                 10.45
long                                              -63.532
depth (km)                                             10
magnitude                                          6.9 Mw
secondary effects                                       L
pde shaking deaths                                     81
pde total deaths                                       81
utsu total deaths                                      81
em-dat total deaths                                    80
other source deaths                                   NaN
deaths                                                 81
original_magnitude                                    6.9
original_magnitude_scale                               Mw
origin_utc_string                     1997-07-09T19:24:00
position                            POINT (-63.532 10.45)
geometry      

In [17]:
def fix_country(row):
    try:
        value = re.sub("[\(\[].*?[\)\]]", "",str(row))
        value = re.sub('\s\(.*',"",str(row))
        return value
    except:
        return None

In [18]:
eq_table_geo['country'] = [fix_country(row) for row in eq_table_geo['country']]

In [19]:
eq_table_geo['country'][1033] = 'Venezuela'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eq_table_geo['country'][1033] = 'Venezuela'


In [20]:
eq_table_geo.iloc[1033]

origin (utc)                  1997-07-09 19:24:00
country                                 Venezuela
lat                                         10.45
long                                      -63.532
depth (km)                                     10
magnitude                                  6.9 Mw
secondary effects                               L
pde shaking deaths                             81
pde total deaths                               81
utsu total deaths                              81
em-dat total deaths                            80
other source deaths                           NaN
deaths                                         81
original_magnitude                            6.9
original_magnitude_scale                       Mw
origin_utc_string             1997-07-09T19:24:00
position                    POINT (-63.532 10.45)
geometry                    POINT (-63.532 10.45)
Name: 1033, dtype: object

In [21]:
eqs_by_country = eq_table_geo.groupby(by='country')['origin (utc)'].count()

In [22]:
eqs_by_country = eqs_by_country.reset_index()

In [23]:
eqs_by_country

Unnamed: 0,country,origin (utc)
0,Afghanistan,30
1,Albania,7
2,Algeria,21
3,Argentina,11
4,Armenia,2
...,...,...
104,Venezuela,12
105,Vietnam,1
106,Wallis and Futuna,1
107,Yemen,3


In [24]:
eqs_by_country = eqs_by_country.rename(columns={'origin (utc)':'total earthquakes'})

In [25]:
eqs_by_country

Unnamed: 0,country,total earthquakes
0,Afghanistan,30
1,Albania,7
2,Algeria,21
3,Argentina,11
4,Armenia,2
...,...,...
104,Venezuela,12
105,Vietnam,1
106,Wallis and Futuna,1
107,Yemen,3


In [26]:
eqs_by_country['total earthquakes'].median()

3.0

In [27]:
eqs_by_country.describe()

Unnamed: 0,total earthquakes
count,109.0
mean,12.293578
std,24.657019
min,1.0
25%,1.0
50%,3.0
75%,9.0
max,166.0


In [28]:
eqs_by_country_z1 = eqs_by_country[eqs_by_country['total earthquakes'] > 24 ]

In [29]:
countries_z1 = eqs_by_country_z1['country'].to_list()

In [30]:
countries_z1

['Afghanistan',
 'Chile',
 'China',
 'Greece',
 'India',
 'Indonesia',
 'Iran',
 'Italy',
 'Japan',
 'Mexico',
 'Pakistan',
 'Peru',
 'Philippines',
 'Taiwan',
 'Turkey',
 'United States']

In [31]:
eq_table_geo_z1 = eq_table_geo[eq_table_geo['country'].isin(countries_z1)]

In [32]:
eq_table_geo_z1

Unnamed: 0,origin (utc),country,lat,long,depth (km),magnitude,secondary effects,pde shaking deaths,pde total deaths,utsu total deaths,em-dat total deaths,other source deaths,deaths,original_magnitude,original_magnitude_scale,origin_utc_string,position,geometry
0,1900-05-11 17:23:00,Japan,38.700,141.100,5,7.0 MJMA,,,,,,,,7.0,MJMA,1900-05-11T17:23:00,POINT (141.1 38.7),POINT (141.10000 38.70000)
1,1900-07-12 06:25:00,Turkey,40.300,43.100,,5.9 Muk,,,,140.0,,,140.0,5.9,Muk,1900-07-12T06:25:00,POINT (43.1 40.3),POINT (43.10000 40.30000)
3,1901-02-15 00:00:00,China,26.000,100.100,0,6.5 Ms,,,,,,,,6.5,Ms,1901-02-15T00:00:00,POINT (100.1 26),POINT (100.10000 26.00000)
5,1901-08-09 09:23:00,Japan,40.500,142.500,35,7.2 Mw,T,,,,,,,7.2,Mw,1901-08-09T09:23:00,POINT (142.5 40.5),POINT (142.50000 40.50000)
7,1902-01-30 14:01:00,Japan,40.500,141.300,35,6.9 Ms,,,,1.0,,,1.0,6.9,Ms,1902-01-30T14:01:00,POINT (141.3 40.5),POINT (141.30000 40.50000)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1334,2011-03-11 14:46:00,Japan,38.322,142.369,24.4,9.0 Mw,T,,,,,15894.0,15894.0,9.0,Mw,2011-03-11T14:46:00,POINT (142.369 38.322),POINT (142.36900 38.32200)
1336,2011-04-07 14:32:00,Japan,38.200,140.000,66.0,7.1 Mw,,,,,,,,7.1,Mw,2011-04-07T14:32:00,POINT (140 38.2),POINT (140.00000 38.20000)
1337,2011-09-18 12:40:00,India,27.723,88.064,19.7,6.9 Mw,L,,,,,111.0,111.0,6.9,Mw,2011-09-18T12:40:00,POINT (88.06399999999999 27.723),POINT (88.06400 27.72300)
1338,2011-09-23 10:41:00,Turkey,38.600,43.500,7.2,7.1 Mw,,,,,,601.0,601.0,7.1,Mw,2011-09-23T10:41:00,POINT (43.5 38.6),POINT (43.50000 38.60000)


In [33]:
world_map = folium.Map(tiles='Stamen Terrain')

In [34]:
heatmapdata = [
        (row['geometry'].y, row['geometry'].x, row['original_magnitude']) for idx, row 
            in eq_table_geo_z1.iterrows() 
                if row['geometry'] and not pd.isna(row['geometry'].x) and not pd.isna(row['geometry'].y and not pd.isna(row['original_magnitude']))
    ]

In [35]:
heatmapdata

[(38.7, 141.1, 7.0),
 (40.3, 43.1, 5.9),
 (26.0, 100.1, 6.5),
 (40.5, 142.5, 7.2),
 (40.5, 141.3, 6.9),
 (40.7, 33.6, 5.5),
 (43.2, 129.6, 6.7),
 (40.0, 77.0, 7.7),
 (39.14, 42.65, 7.0),
 (40.9, 42.7, 5.8),
 (36.36, 22.97, 8.3),
 (35.2, 58.2, 6.5),
 (23.5, 120.5, 6.0),
 (37.75, 27.0, 6.2),
 (30.0, 101.0, 6.8),
 (23.5, 120.3, 6.3),
 (33.0, 76.0, 7.8),
 (34.1, 132.5, 7.0),
 (39.0, 16.0, 6.8),
 (40.0, 24.0, 6.8),
 (23.6, 120.4, 6.8),
 (23.6, 120.4, 7.1),
 (38.0, -123.0, 7.8),
 (-33.0, -72.0, 8.2),
 (43.5, 85.0, 7.2),
 (2.0, 94.5, 7.5),
 (17.0, -100.0, 7.9),
 (14.0, 123.0, 7.1),
 (23.0, 121.1, 6.8),
 (38.0, 15.5, 7.2),
 (38.6, 26.9, 5.8),
 (33.0, 50.0, 7.3),
 (40.0, 38.0, 6.8),
 (25.0, 122.5, 7.1),
 (24.4, 103.0, 6.5),
 (-2.0, 101.0, 7.2),
 (35.4, 136.3, 6.7),
 (30.0, 68.0, 7.0),
 (32.3, 131.1, 7.5),
 (35.0, 122.0, 6.8),
 (36.0, 24.5, 6.9),
 (25.0, 123.0, 7.6),
 (32.0, 56.0, 6.7),
 (17.5, -102.5, 7.6),
 (28.0, 130.0, 8.1),
 (38.0, 20.5, 6.8),
 (40.5, 27.0, 7.6),
 (19.83, -99.92, 6.7),
 (4.

In [36]:
hm = HeatMap(data=heatmapdata)

In [37]:
hm.add_to(world_map)

<folium.plugins.heat_map.HeatMap at 0x24f757a1430>

In [38]:
world_map

In [39]:
data_points = []
max_mag = 0
for idx, row in eq_table_geo_z1.sort_values('origin (utc)').iterrows():
    if row['geometry'] and not pd.isna(row['geometry'].x) and not pd.isna(row['geometry'].y):
        data_points.append((row['origin (utc)'].to_pydatetime(), row['geometry'].y, row['geometry'].x, row['original_magnitude']))
        if row['original_magnitude'] > max_mag:
            max_mag = row['original_magnitude']

by_year=defaultdict(list)
for record in data_points:
    by_year[f'{record[0].strftime("%Y")}'].append([record[1],record[2], 1])
    
over_time = []
time_index = []
for key, values in by_year.items():
    over_time.append(values)
    time_index.append(key)

In [40]:
over_time

[[[38.7, 141.1, 1], [40.3, 43.1, 1]],
 [[26.0, 100.1, 1], [40.5, 142.5, 1]],
 [[40.5, 141.3, 1], [40.7, 33.6, 1], [43.2, 129.6, 1], [40.0, 77.0, 1]],
 [[39.14, 42.65, 1], [40.9, 42.7, 1], [36.36, 22.97, 1], [35.2, 58.2, 1]],
 [[23.5, 120.5, 1], [37.75, 27.0, 1], [30.0, 101.0, 1], [23.5, 120.3, 1]],
 [[33.0, 76.0, 1], [34.1, 132.5, 1], [39.0, 16.0, 1], [40.0, 24.0, 1]],
 [[23.6, 120.4, 1],
  [23.6, 120.4, 1],
  [38.0, -123.0, 1],
  [-33.0, -72.0, 1],
  [43.5, 85.0, 1]],
 [[2.0, 94.5, 1], [17.0, -100.0, 1], [14.0, 123.0, 1]],
 [[23.0, 121.1, 1], [38.0, 15.5, 1]],
 [[38.6, 26.9, 1],
  [33.0, 50.0, 1],
  [40.0, 38.0, 1],
  [25.0, 122.5, 1],
  [24.4, 103.0, 1],
  [-2.0, 101.0, 1],
  [35.4, 136.3, 1],
  [30.0, 68.0, 1],
  [32.3, 131.1, 1]],
 [[35.0, 122.0, 1], [36.0, 24.5, 1], [25.0, 123.0, 1]],
 [[32.0, 56.0, 1], [17.5, -102.5, 1], [28.0, 130.0, 1]],
 [[38.0, 20.5, 1], [40.5, 27.0, 1], [19.83, -99.92, 1]],
 [[4.5, 126.5, 1], [-17.0, -74.0, 1], [-14.2, -72.9, 1], [24.5, 102.0, 1]],
 [[31.6, 

In [41]:
mapheattime = folium.Map(tiles='Stamen Terrain')
hm = HeatMapWithTime(data=over_time,
    index=time_index,
    auto_play=False,
    max_opacity=0.3,
    )
hm.add_to(mapheattime)

<folium.plugins.heat_map_withtime.HeatMapWithTime at 0x24f75694160>

In [42]:
mapheattime