In [None]:
import numpy as np
import pandas as pd
import math
import random
import itertools

# visualization
import seaborn as sns
sns.set_style('darkgrid')

# date libraries
import datetime
import holidays

# matplotlib libraries
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.dates import YearLocator, DateFormatter
import matplotlib.dates as mdates

# scipy library
from scipy import stats
from scipy.stats import norm

# sklearn library
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

# prophet library
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly, plot_cross_validation_metric
from prophet.diagnostics import performance_metrics, cross_validation


## defining useful functions

missing data

In [None]:
def missing_data(input_data):
    '''
    This function returns dataframe with information about the percentage of nulls in each column and the column data type.
    
    input: pandas df
    output: pandas df
    
    '''
    
    total = input_data.isnull().sum()
    percent = (input_data.isnull().sum()/input_data.isnull().count()*100)
    table = pd.concat([total, percent], axis = 1, keys = ['Total', 'Percent'])
    types = []
    for col in input_data.columns: 
        dtype = str(input_data[col].dtype)
        types.append(dtype)
    table["Types"] = types
    return(pd.DataFrame(table))

mean absolute percentage error

In [None]:
# mean absolute percentage error function
def mape(actual, pred):
    '''
    Mean Absolute Percentage Error (MAPE) Function

    input: list/series for actual values and predicted values
    output: mape value
    '''

    actual, pred = np.array(actual), np.array(pred)
    return np.mean(np.abs((actual - pred) / actual)) * 100

# read data

In [None]:
datapath = 'relevant_stock_market_data.csv'

In [None]:
tsdata = pd.read_csv(datapath, parse_dates=['DATE'], index_col='DATE')

In [None]:
min(df['date']), max(df['date'])

In [None]:
# filtering for agriculture stocks
filtered_stock_data = tsdata[tsdata['SECTOR'] == 'AGRICULTURE']

In [None]:
del filtered_stock_data['SECTOR']

In [None]:
filtered_stock_data.head()

In [None]:
# pivoting the Dataframe to the number of unique symbols
pivoted_tsdata = filtered_stock_data.pivot(columns='SYMBOL', values='CLOSEPRICE')
final_tsdata = pivoted_tsdata.rename(columns={'ELLAHLAKES': 'ELLAHLAKES', 'GROMMAC': 'GROMMAC', 'LIVESTOCK': 'LIVESTOCK',
                                     'OKITIPUPA': 'OKITIPUPA', 'OKOMUOIL': 'OKOMUOIL', 'PRESCO': 'PRESCO', 'FTNCOCOA': 'FTNCOCOA'})


In [None]:
# Resample the data with daily frequency by interpolation
agriculture_tsdata = final_tsdata.resample('D').interpolate(method='linear')

In [None]:
agriculture_tsdata.fillna(0, inplace=True)

# visualization

In [None]:
agriculture_tsdata.head()

In [None]:
# Plotting
agriculture_tsdata.plot(figsize=(15, 6))
plt.xlabel('Year')
plt.ylabel('Stock Price')
plt.grid(True)
plt.legend(title='agriculture stocks', loc='upper right')
plt.show()

In [None]:
for column in agriculture_tsdata.columns:
    plt.plot(agriculture_tsdata[column])
    plt.title(column)
    plt.show()

In [None]:
missing_data(agriculture_tsdata).head()