In [None]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from matplotlib.pyplot import figure
import seaborn as sns
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.dates as mdates
from sklearn import linear_model
from sklearn.model_selection import TimeSeriesSplit
from sklearn.svm import SVR

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv('/kaggle/input/gold-price-prediction/goldstock.csv')

In [None]:
df

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.dtypes

In [None]:
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

In [None]:
df.dtypes

In [None]:
df.drop('Unnamed: 0', axis=1, inplace=True)

df.describe()

In [None]:
# computing mean,standard deviation and kurtosis of Gold ETF daily return

mean=df['Close'].mean()
# computing standard deviation of Gold stock
std=df['Close'].std()
kurt=df['Close'].kurtosis()
print('Mean=',mean)
print('Standard Deviation=',std)
print('Kurtosis=',kurt)
#Plotting Histogram
df['Close'].hist(bins=20)

plt.axvline(mean, color='w',linestyle='dashed',linewidth=2)
plt.axvline(std, color='r',linestyle='dashed',linewidth=2)
plt.axvline(-std, color='r',linestyle='dashed',linewidth=2)
plt.title("Plotting of Mean, Standard deviation and Kurtosis of Gold Prices")
plt.show()

In [None]:
# Assuming df is your DataFrame
correlation_matrix = df.corr()

# Plotting the correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.3f', linewidths=.5)
plt.title('Correlation Matrix')
plt.show()

In [None]:

sns.distplot(df['Close'],color='green')

In [None]:
X = df.drop(['Date','Close'],axis=1)
Y = df['Close']

# Train Test Split

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=2)

# RandomForest Regressor

In [None]:
regressor = RandomForestRegressor(n_estimators=100)


In [None]:

# training the model
regressor.fit(X_train,Y_train)

In [None]:
test_data_prediction = regressor.predict(X_test)

In [None]:

error_score = r2_score(Y_test, test_data_prediction)
print("R squared error : ", error_score)

In [None]:
Y_test = list(Y_test)


In [None]:
plt.plot(Y_test, color='red', label = 'Actual Value')
plt.plot(test_data_prediction, color='black', label='Predicted Value')
plt.title('Actual Price vs Predicted Price')
plt.xlabel('Number of values')
plt.ylabel('GLD Price')
plt.legend()
plt.show()

## LSTM

In [None]:
import plotly.express as px
from plotly import figure_factory as figfac
from termcolor import colored

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout,LSTM
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import math
from sklearn.metrics import mean_squared_error
!pip install keras-tuner --upgrade
from kerastuner.engine.hyperparameters import HyperParameters
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch

In [None]:
fig = figfac.create_table(df.head(n = 10))
fig.show()

In [None]:
print("Length of the dataset: ", len(df))
print("Maximum closing price of gold during last twenty two years: ", df["Close"].max())
print("Minimum closing price of gold during last twenty two years: ", df["Close"].min())

In [None]:
corr = df.corr()
corr.style.background_gradient(cmap = "copper")

In [None]:
plt.figure(figsize = [15, 7], clear = True, facecolor = '#EAEAE6')
sns.heatmap(df.corr(), annot = True, square = False, linewidths = 5,
            linecolor = "white", cmap = "Oranges");

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
import seaborn as sns
rcParams['figure.figsize'] = (14,8)
sns.set_style('darkgrid')


# ARIMA Model

In [None]:
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller

In [None]:
df.Date = pd.to_datetime(df.Date,dayfirst=True)
df.set_index('Date',inplace=True)
df = df[~df.index.duplicated()]
df = df.asfreq('b','ffill')


In [None]:
data = df[df.index > '2021']


In [None]:


fig,axes = plt.subplots(2,2,figsize=[15,7])

axes[0,0].plot(data.Close)
axes[0,0].set_title("Daily",size=16)
axes[0,1].plot(data.Close.resample('M').mean())
axes[0,1].set_title("Monthly",size=16)


axes[1,0].plot(data.Close.resample('Q').mean())
axes[1,0].set_title('Quarterly',size=16)

axes[1,1].plot(data.Close.resample('A').mean())
axes[1,1].set_title('Annualy',size=16)

plt.tight_layout()
plt.show()


# Seasonal Decompose

In [None]:
decompose_result = seasonal_decompose(data['Close'])
trend = decompose_result.trend
seasonal = decompose_result.seasonal
residual = decompose_result.resid
decompose_result.plot();

In [None]:
def plot_rolling_stats(series,window):

    rol_mean = series.rolling(window).mean()
    rol_std  = series.rolling(window).std()


    fig = plt.figure(figsize=(10,5))
    orig = plt.plot(series,color='blue',label='Original')
    mean = plt.plot(rol_mean,color='red',label='Rolling mean')
    std  = plt.plot(rol_std,color='black',label='Rolling std')

    plt.title('Rolling Mean/Standard Deviation',size=20)
    plt.legend(loc='best')
    plt.show(block=False)

In [None]:
def stationarity_check(series):
    print('Results of Dickey Fuller Test:')
    dftest = adfuller(series, autolag='AIC')

    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value',
                                             '#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value

    print(dfoutput)


In [None]:
plot_rolling_stats(data.Close,30)
stationarity_check(data.Close)

In [None]:
plot_rolling_stats(data.Close.diff()[1:],30)
stationarity_check(data.Close.diff()[1:])

In [None]:
fig = plt.figure(figsize=(14,5))
ax_1 = fig.add_subplot(121)
plot_pacf(data.Close,lags=20,zero=False,ax=ax_1)

ax_2 = fig.add_subplot(122)
plot_acf(data.Close,lags=20,zero=False,ax=ax_2);

In [None]:
class Data:
    def __init__(self,dataset,window,forecast_steps=1):
        self.dataset = dataset
        self.window = window
        self.steps = forecast_steps
        self.index = 0

    def get_train(self):
        training_data = self.dataset[self.index:self.index+self.window]
        return training_data.values

    def get_test(self):
        test_index = self.index+self.window
        test_data = self.dataset[test_index:test_index+self.steps]
        return test_data.values

    def get_data(self):
        train = self.get_train()
        test  = self.get_test()
        self.index+= self.steps
        return train, test


In [None]:
warnings.filterwarnings('ignore')
steps = 1 ## day ahead
window = 5 ## business week

iterations = int(len(data.Close[window:])/steps)
predictions = []

dataset = Data(data.Close,window,steps)

for _ in range(iterations):
    train,test = dataset.get_data()
    model = ARIMA(train,order=(2,1,1)).fit()
    predictions.append(model.forecast(steps=steps)[0])


In [None]:
date_index = pd.date_range(start='2021-01-11',end=data.index[-1].strftime("%Y-%m-%d"),freq='b')
df_pred = pd.DataFrame({'predictions':predictions},index=date_index)


In [None]:
plt.plot(df_pred,color='red',label='predictions')
plt.plot(data.Close[window:],color='blue',label='original')
plt.title('Original vs Predictions',size=20)
plt.legend(loc='best');

In [None]:
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error

true_values = data.Close[window:]

print(f"Mean Absolute Error: {mean_absolute_error(true_values, predictions)}")
print(f"Mean Absolute Percentage Error: {mean_absolute_percentage_error(true_values, predictions)}")
