# Title: Icelandic Fishery Production ML Time-Series-Analysis

# 1. Import libraries

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import datetime as dt
import matplotlib as plt 
from matplotlib import pyplot
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import statsmodels
from statsmodels.tsa.ar_model import AutoReg
import plotly.express as px
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf, month_plot, quarter_plot
plt.style.use('seaborn')
plt.rcParams["figure.figsize"] = (16, 8)
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 2. Create dataframe and slice the data by Iceland entities 

In [None]:
df = pd.read_csv("../input/fish-and-overfishing/capture-fishery-production.csv", usecols =["Entity","Code", "Year", "Capture fisheries production (metric tons)"])
df.columns =["Country", "Code", "Year", "Fishery Production"]
df.head()

In [None]:
df = df.loc[df['Country']=='Iceland']

In [None]:
df.reset_index(drop=True,inplace=True)
df.head()

# 3. Check out the historical data

In [None]:
#code from https://towardsdatascience.com/5-types-of-plots-that-will-help-you-with-time-series-analysis-b63747818705
sns.lineplot(data=df, 
             x='Year', 
             y='Fishery Production').set_title('Iceland Fishery Production');

In [None]:
#code from https://towardsdatascience.com/5-types-of-plots-that-will-help-you-with-time-series-analysis-b63747818705
decomposition = seasonal_decompose(df['Fishery Production'], 
                                   model='multiplicative', 
                                   period=12,)
decomposition.plot();

# 4. Set the time variable as the index and isolate the fishery production data point

In [None]:
df['Year'] = pd.to_datetime(df['Year'], format='%Y',errors='ignore')
df.set_index('Year', inplace=True)

In [None]:
tsa_df = df.iloc[:,2:3]
tsa_df.head()

## 5. Train statsmodel library Autoreg model, predict next 7 years of data, validate the model scores 

In [None]:
#code from https://machinelearningmastery.com/autoregression-models-time-series-forecasting-python/
# split dataset
X = tsa_df.values
train, test = X[1:len(X)-7], X[len(X)-7:]
# train autoregression
window = 9
model = AutoReg(train, lags=9)
model_fit = model.fit()
coef = model_fit.params
# walk forward over time steps in test
history = train[len(train)-window:]
history = [history[i] for i in range(len(history))]
predictions = list()
for t in range(len(test)):
    length = len(history)
    lag = [history[i] for i in range(length-window,length)]
    yhat = coef[0]
    for d in range(window):
        yhat += coef[d+1] * lag[window-d-1]
    obs = test[t]
    predictions.append(yhat)
    history.append(obs)
    print('predicted=%f, expected=%f' % (yhat, obs))
rmse = sqrt(mean_squared_error(test, predictions))
print('Test RMSE: %.3f' % rmse)
# plot
pyplot.plot(test)
pyplot.plot(predictions, color='red')
pyplot.show()

## The image above shows the difference between the predicted values and the expected

# 6. Slice the data to show a total of 9 years of Fishery Production in Iceland from 2015-2025. 2019-2025 are predicted Fishery Production tons!

In [None]:
olddf = df.iloc[55:59,2:3]
yr = ['2019','2020','2021','2022','2023','2024','2025'] 
yrn = pd.to_datetime((yr), format='%Y',errors='ignore')
ndf = pd.DataFrame(predictions, columns=["Fishery Production"], index=[pd.to_datetime(i, format='%Y',errors='ignore') for i in yr])
fin = olddf.append(ndf)
fin.head()

# 7. Plot the 9 years of data!

In [None]:
sns.lineplot(data=fin, 
             x=fin.index, 
             y='Fishery Production').set_title('Iceland Fishery Production from 2015 - 2025');

In [None]:
decomposition = seasonal_decompose(fin['Fishery Production'], 
                                   model='multiplicative', 
                                   period=5,)
decomposition.plot();