In [None]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

## Read data

In [None]:
df = pd.read_csv('../input/gold-price-prediction-dataset/FINAL_USO.csv', parse_dates = True)
df.head()

In [None]:
df.describe()

In [None]:
df.columns

## data visulization

In [None]:
label_name = list(df.columns)
close_value = []
for i in range(len(label_name)):
    if str.lower(label_name[i].replace(" ", "")[-6:]) == 'jclose' or str.lower(label_name[i].replace("_", "")[-6:]) == 'jclose':
        close_value.append(label_name[i])
        
del close_value[2]
close_value

In [None]:
close_data = pd.DataFrame(df, columns = close_value)
correlation_mat = close_data.corr()

sns.heatmap(correlation_mat, annot = True)

plt.show()

In [None]:
fig = go.Figure([go.Scatter(x=df['Date'], y=df['Adj Close'])])
fig.show()

In [None]:
ma_day = [10, 20, 50]
for ma in ma_day:
    column_name = f"MA for {ma} days"
    df[column_name] = df['Adj Close'].rolling(ma).mean()

In [None]:
fig = px.line(df, x="Date", y=['Adj Close', 'MA for 10 days', 'MA for 20 days', 'MA for 50 days'],
              title='Adj close')
fig.show()

In [None]:
df['Daily Return'] = df['Adj Close'].pct_change()
fig = px.scatter(df, x="Date", y="Daily Return", title="Daily Return")
fig.show()

In [None]:
fig = px.histogram(df, x="Date", y="Daily Return", histfunc="avg", title="Daily Return")
fig.show()

In [None]:
fig = px.line(df, x="Date", y=['Volume'],
              title='Volume')
fig.show()

In [None]:
volumn_max = max(df['Volume'])
index = df[df['Volume']==volumn_max].index.values[0]
print("Max Volume's day is:", df['Date'][index],"\n""volume:", df['Volume'][index],'\nthe day of close price:', df['Adj Close'][index])
print('Average Adj close:', df["Adj Close"].mean())

## LSTM

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, LSTM, TimeDistributed, RepeatVector
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
Adj_data = df.loc[:,close_value]

adj_close = Adj_data[['Adj Close']]

training_data_len = int(np.ceil( len(Adj_data) * .90 ))

sc = MinMaxScaler(feature_range = (0, 1))
scaled_data = sc.fit_transform(Adj_data)

sc1 = MinMaxScaler(feature_range = (0, 1))
sc_data = sc1.fit_transform(adj_close)

In [None]:
train_data = scaled_data[0:int(training_data_len), :]

x_train = []
y_train = []

for i in range(60, len(train_data)):
    x_train.append(train_data[i-60:i, :])
    y_train.append(train_data[i, 0])
    
x_train, y_train = np.array(x_train), np.array(y_train)

# Reshape the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 6))
# x_train.shape

In [None]:
def buildManyToOneModel(shape):
    model = Sequential()
    model.add(LSTM(10, input_length=shape[1], input_dim=shape[2]))
  # output shape: (1, 1)
    model.add(Dense(1))
    model.compile(loss="mse", optimizer="adam")
    model.summary()
    return model

In [None]:
model = buildManyToOneModel(x_train.shape)
callback = EarlyStopping(monitor="loss", patience=10, verbose=1, mode="auto")
model.fit(x_train, y_train, epochs=1000, batch_size=128, callbacks=[callback])

In [None]:
test_data = scaled_data[training_data_len - 60: , :]
# Create the data sets x_test and y_test
x_test = []
y_test = Adj_data['Adj Close'][training_data_len:].values
for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i, :])
    
x_test = np.array(x_test)

# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 6))

# Get the models predicted price values 
predictions = model.predict(x_test)
predictions = sc1.inverse_transform(predictions)

In [None]:
t = np.linspace(0, len(y_test), len(y_test))
predictions = np.reshape(predictions,len(predictions))

fig = go.Figure()

fig.add_trace(go.Scatter(x=t, y=y_test,
                    mode='lines',
                    name='True data'))

fig.add_trace(go.Scatter(x=t, y=predictions,
                    mode='lines',
                    name='predict'))


fig.show()