In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
# Install Facebook's Prophet library
!pip install fbprophet

In [None]:
ROOT = '../input/earthquake-database/'

In [None]:
# Read file
df = pd.read_csv(ROOT + 'database.csv')
print (df.shape)
df.head(5)

In [None]:
import datetime

# Extract year for filtering purpose
year = []
for index, row in df.iterrows():
    try:
        date = row['Date']
        date_time_obj = datetime.datetime.strptime(date, '%m/%d/%Y')
        y = date_time_obj.date().year
        year.append(y)
    except:
        year.append(-1)
print (year[:5])

In [None]:
# Insert new column 'Year'
df.insert(loc=1, column='Year', value=year)
df.head(5)

In [None]:
# Drop unused columns
keep = np.array(['Date', 'Year', 'Latitude', 'Longitude'])
cols = np.array(df.columns)
drop = list(set(cols) - set(keep))
sample = df.drop(drop, axis=1)
sample.head(5)

In [None]:
# Ploting number of earthquakes from 2000 to 2016
d = {}
for year in range(1965,2017):
    d[year] = len(sample.loc[sample['Year'] == year])
fig = plt.figure(figsize=(10, 4))
plt.plot(d.keys(), d.values(), marker='o')
plt.grid()
plt.show()

In [None]:
gmt_range = {}
c = 0 # GMT+-c
for i in range (0, 180, 15):
    low = i-7.5
    gmt_range[-c] = (-low, (-low) - 15)
    gmt_range[c] = (low, low + 15)
    c += 1
gmt_range[12] = (172.5, -172.5)
gmt_range

In [None]:
mainFrame = {}
for timezone in range(12):
    tz_range = gmt_range[timezone]
    fbtz = sample.loc[(sample['Longitude'] > tz_range[0]) & (sample['Longitude'] <= tz_range[1])]
    yf = {}
    for year in range(1965,2017):
        fby = fbtz.loc[fbtz['Year'] == year] 
        yf[year] = len(fby)
    mainFrame['GMT+' + str(timezone)] = yf
    
for timezone in range(1,12):
    tz_range = gmt_range[-timezone]
    fbtz = sample.loc[(sample['Longitude'] <= tz_range[0]) & (sample['Longitude'] > tz_range[1])]
    yf = {}
    for year in range(1965,2017):
        fby = fbtz.loc[fbtz['Year'] == year] 
        yf[year] = len(fby)
    mainFrame['GMT-' + str(timezone)] = yf

#GMT+12
tz_range = gmt_range[12]
fbtz = sample.loc[(sample['Longitude'] > tz_range[0]) | (sample['Longitude'] <= tz_range[1])]
yf = {}
for year in range(1965,2017):
    fby = fbtz.loc[fbtz['Year'] == year] 
    yf[year] = len(fby)
mainFrame['GMT+12'] = yf
    
maindf = pd.DataFrame(mainFrame)
maindf.plot.line(figsize=(20, 10))

In [None]:
import seaborn as sn

corr = maindf.corr()
fig, ax = plt.subplots(figsize=(20,20))
sn.heatmap(corr, annot=True, ax=ax)

In [None]:
maindf.plot.line(subplots=True, figsize=(15, 30), layout=(12,2))

In [None]:
pd.DataFrame(mainFrame['GMT+12'].items(), columns=['ds', 'y'])[-10:]

In [None]:
from fbprophet import Prophet
from fbprophet.plot import plot

In [None]:
class ProphetGMT:
    def __init__(self, tz, holidays=None):
        self.stz = 'GMT' + str(tz)
        if tz >= 0 :
            self.stz = 'GMT+' + str(tz)
        prophet_data = pd.DataFrame(mainFrame[self.stz].items(), columns=['ds', 'y'])
        train_prophet = prophet_data[:-10]
        test_prophet = prophet_data[-10:]
        self.train = train_prophet.copy()
        self.test = test_prophet.copy()
        self.train['ds'] = train_prophet.apply(
            lambda row : datetime.datetime.strptime(str(row['ds']) + '-12-31', '%Y-%m-%d'), 
            axis=1
        )

        self.test['ds'] = test_prophet.apply(
            lambda row : datetime.datetime.strptime(str(row['ds']) + '-12-31', '%Y-%m-%d'), 
            axis=1
        )
        
        self.model = Prophet(holidays=holidays)
        self.model.fit(self.train)
        self.future = self.model.make_future_dataframe(periods=16, freq='A', include_history=True)
        self.forecast = self.model.predict(self.future)    
        
    def plot_forecast(self):
        return self.model.plot(fcst=self.forecast, ylabel='y (GMT{})'.format(self.stz))

In [None]:
m = ProphetGMT(12)

In [None]:
m.

In [None]:
# Example
gmt_models = {}
for i in range(-11,13):
    gmt_models[i] = ProphetGMT(i)
gmt_models

In [None]:
for i in range(-11,13):
    gmt_models[i].plot_forecast()

In [None]:
from sklearn.metrics import r2_score, mean_squared_error
from math import sqrt

In [None]:
r2_gmt = {}
for i in range(-11,13):
    tz = 'GMT+' + str(i)
    if i < 0:
        tz = 'GMT' + str(i)
    y_true = [mainFrame[tz][j] for j in range(2001, 2017)]
    y_pred = list(gmt_models[i].forecast.tail(16)['yhat'])
    r2_gmt[i] = (r2_score(y_true=y_true, y_pred=y_pred))
plt.figure(figsize=(10,6))
plt.grid()
plt.bar(range(len(r2_gmt)), list(r2_gmt.values()), align='center')
plt.xticks(range(len(r2_gmt)), list(r2_gmt.keys()))

In [None]:
mse_gmt = {}
for i in range(-11,13):
    tz = 'GMT+' + str(i)
    if i < 0:
        tz = 'GMT' + str(i)
    y_true = [mainFrame[tz][j] for j in range(2001, 2017)]
    y_pred = list(gmt_models[i].forecast.tail(16)['yhat'])
    mse_gmt[i] = sqrt(mean_squared_error(y_true=y_true, y_pred=y_pred))
plt.figure(figsize=(10,6))
plt.grid()
plt.bar(range(len(mse_gmt)), list(mse_gmt.values()), align='center')
plt.xticks(range(len(mse_gmt)), list(mse_gmt.keys()))

In [None]:
# manual annotation
peak = {
    'holiday' : 'peak',
    'ds' : ['1976-12-31', '1990-12-31', '1995-12-31', '2000-12-31', '2004-12-31', '2007-12-31', '2011-12-31']
}
peak = pd.DataFrame(peak)
peak

In [None]:
gmt_models_holidays = {}
for i in range(-11,13):
    gmt_models_holidays[i] = ProphetGMT(i, holidays=peak)
gmt_models_holidays

In [None]:
for i in range(-11,13):
    gmt_models_holidays[i].plot_forecast()

In [None]:
r2_gmt_holidays = {}
for i in range(-11,13):
    tz = 'GMT+' + str(i)
    if i < 0:
        tz = 'GMT' + str(i)
    y_true = [mainFrame[tz][j] for j in range(2001, 2017)]
    y_pred = list(gmt_models_holidays[i].forecast.tail(16)['yhat'])
    r2_gmt_holidays[i] = (r2_score(y_true=y_true, y_pred=y_pred))
plt.figure(figsize=(10,6))
plt.grid()
plt.bar(range(len(r2_gmt_holidays)), list(r2_gmt_holidays.values()), align='center')
plt.xticks(range(len(r2_gmt_holidays)), list(r2_gmt_holidays.keys()))

In [None]:
mse_gmt_holidays = {}
for i in range(-11,13):
    tz = 'GMT+' + str(i)
    if i < 0:
        tz = 'GMT' + str(i)
    y_true = [mainFrame[tz][j] for j in range(2001, 2017)]
    y_pred = list(gmt_models_holidays[i].forecast.tail(16)['yhat'])
    mse_gmt_holidays[i] = sqrt(mean_squared_error(y_true=y_true, y_pred=y_pred))
plt.figure(figsize=(10,6))
plt.grid()
plt.bar(range(len(mse_gmt_holidays)), list(mse_gmt_holidays.values()), align='center')
plt.xticks(range(len(mse_gmt_holidays)), list(mse_gmt_holidays.keys()))

In [None]:
plt.figure(figsize=(10,6))
plt.plot(mse_gmt.keys(), mse_gmt.values())
plt.plot(mse_gmt_holidays.keys(), mse_gmt_holidays.values())
plt.legend(['before seasonality', 'after seasonality'], loc='upper left')
plt.grid()

**Prediction number compared to actual number of earthquakes in graph**

In [None]:
fig, ax = plt.subplots(8, 3, figsize=(25,25))
keys = [i for i in range(2001, 2017)]
row = 0
col = 0
for i in range(-11,13):
    tz = 'GMT+' + str(i)
    if i < 0:
        tz = 'GMT' + str(i)
        
    y_true = [mainFrame[tz][j] for j in range(2001, 2017)]
    y_pred = list(gmt_models_holidays[i].forecast.tail(16)['yhat'])
    
    ax[row, col].plot(y_true, label='True')
    ax[row, col].plot(y_pred, label='Prediction')
    
    ax[row, col].legend(loc="upper left")
    ax[row,col].set_xticklabels(keys)
    ax[row,col].set_title(tz)
    
    if col == 2:
        row += 1
        col = 0
    else:
        col += 1