In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# We will import the necessary libarires for the dataset. 

* **warnings** -> to eliminate unnecessary warnings
* **matplotlib** -> for visualization
* **prophet** -> forecasting model

In [None]:
import warnings
import matplotlib.pyplot as plt
from fbprophet import Prophet
warnings.simplefilter('ignore')

In [None]:
%config Completer.use_jedi = False

#for autocomplete

With the help of pandas we can import the dataset

In [None]:
df = pd.read_csv('/kaggle/input/indian-earthquakes-dataset2018-onwards/Indian_earthquake_data.csv')
df.head()

In [None]:
df.info()

As we can see that the dataset is having 6 columns, we are having the date column as object. We can try to strip the date column first from the hourly time and then we can convert it into a date frame object

In [None]:
df[['time']] = df['Origin Time'].apply(lambda x: str(x)[0:11])
df

In [None]:
df = df[['time','Depth','Magnitude']]
df

In [None]:
df['time'] = pd.to_datetime(df['time'])
df.info()

Now we have a subset from the original dataset, let us visualize the data and gather the insights

In [None]:
plt.figure(figsize=(10,5))
plt.plot(df.time,df.Magnitude)
plt.show()

In [None]:
grouped_df = df.groupby(by='time').mean()
grouped_df

In [None]:
grouped_df.Magnitude.plot()

In order to proceed with the modeling, we have to group the data by its sum and then fit the resampled model in the fit method

In [None]:
model_df = grouped_df.drop(columns=['Depth'])
model_df

In [None]:
model_df = model_df.resample('D').sum()
model_df

In [None]:
model_df.reset_index(inplace=True)

In [None]:
model_df.columns = ['ds','y']


In [None]:
model_df

For future predictions the dataset is divided into two; train and test. This will help us to see the test data predictions.

In [None]:
train_df = model_df[:-30]
test_df = model_df[-30:]

In [None]:
train_df

In [None]:
test_df

In [None]:
m = Prophet(daily_seasonality=True,yearly_seasonality=True)
model = m.fit(train_df)

In [None]:
future = m.make_future_dataframe(periods=50,freq='D')
forecast = m.predict(future)
forecast.head()

We will fit the resampled data in the model and see the predictions

In [None]:
forecast_test = m.predict(test_df)
forecast_test.head()

In [None]:
future_eq = m.plot(forecast)

In [None]:
m.plot_components(forecast);

# Inference

* From this inference we can tell that the there is a high chance for earthquake on Fridays and it is likely to always happen during midnights (comparing all the days of the week)

* In addition to that, we can also expect a high possibility of earthquake during the month of August and April. There is a low possibility in september and January respectively

In [None]:
forecast_test = forecast_test[['ds','yhat', 'yhat_lower', 'yhat_upper']]
forecast_test

In [None]:
m.plot(forecast_test);

In [None]:
plt.figure(figsize=(10,5))
plt.plot(forecast_test.ds,forecast_test.yhat,c ='b',label='Forecast')
plt.plot(test_df.ds,test_df.y,c='r',label='Actual data')
plt.legend()

# **Monthly forecasting**

In [None]:
resampled_data = model_df.copy()
resampled_data

In [None]:
resampled_data.set_index('ds',inplace = True)

In [None]:
monthly_forecast = resampled_data.resample('M').mean()
monthly_forecast

In [None]:
monthly_forecast.reset_index(inplace = True)

In [None]:
monthly_forecast

In [None]:
plt.plot(monthly_forecast.ds,monthly_forecast.y)

In [None]:
m_month = Prophet(seasonality_mode='multiplicative')
model = m_month.fit(monthly_forecast)

In [None]:
future_month = m_month.make_future_dataframe(periods=5,freq='M')
forecast_month = m_month.predict(future_month)
forecast_month.tail()

In [None]:
m_month.plot(forecast_month);

# Inference

Since, we have less number of data, we get an overall aggregated range of prediction. We can infer that the most of the points, lie between the upper and the lower limit ( filled with blue area)