Importing the Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics

Data Collection and Processing

In [None]:
# loading the csv data to a Pandas DataFrame
gold_data = pd.read_csv('/content/drive/MyDrive/gld_price_data.csv')

In [None]:
# print first 5 rows in the dataframe
gold_data.head()

In [None]:
# print last 5 rows of the dataframe
gold_data.tail()

In [None]:
# number of rows and columns
gold_data.shape

In [None]:
# getting some basic informations about the data
gold_data.info()

In [None]:
# checking the number of missing values
gold_data.isnull().sum()

In [None]:
# getting the statistical measures of the data
gold_data.describe()

Correlation:
1. Positive Correlation
2. Negative Correlation

In [None]:
# finding how data is related to each other 
correlation = gold_data.corr()


In [None]:
# constructing a heatmap to understand the correlatiom
# plotting heatmaps using seabon
plt.figure(figsize = (8,8))
sns.heatmap(correlation, cbar=False, square=True, fmt='.1f',annot=True, annot_kws={'size':8}, cmap='Blues')

In [None]:
# correlation values of GLD
print(correlation['GLD'])

In [None]:
# checking the distribution of the GLD Price
sns.distplot(gold_data['GLD'],color='green')

Splitting the Features and Target

In [None]:
# spliting dataset into input values and expected output
X = gold_data.drop(['Date','GLD'],axis=1)
Y = gold_data['GLD']

In [None]:
X

In [None]:
Y

Splitting into Training data and Test Data

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=2)

Model Training:
Random Forest Regressor

In [None]:
# Using Random Forest Regressor model
regressor = RandomForestRegressor(n_estimators=100)

In [None]:
# training the model
regressor.fit(X_train,Y_train)

Model Evaluation

In [None]:
# prediction on Test Data
test_data_prediction = regressor.predict(X_test)

In [None]:
test_data_prediction

In [None]:
# R squared error
error_score = metrics.r2_score(Y_test, test_data_prediction)
print("R squared error : ", error_score)

Compare the Actual Values and Predicted Values in a Plot

In [None]:
Y_test = list(Y_test)

In [None]:
plt.plot(Y_test, color='blue', label = 'Actual Value')
plt.plot(test_data_prediction, color='green', label='Predicted Value')
plt.title('Actual Price vs Predicted Price')
plt.xlabel('Number of values')
plt.ylabel('GLD Price')
plt.legend()
plt.show()