In [1]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Project Outline

1. Background
2. Data Collection and Preprocessing
3. Visualize Data
4. Training Model
5. Predict Stock Price

## 1. Background

Long Short-Term Memory (LSTM) networks are a type of recurrent neural network (RNN) that can be applied for financial stock price prediction. LSTMs can capture patterns in time series data. 

## 2. Data Collection and Preprocessing

Collect historical stock price data of Spotify (SPOT) using the yfinance library which retrieves financial data from Yahoo Finance. 


In [3]:
!pip install yfinance

Collecting yfinance
  Downloading yfinance-0.2.28-py2.py3-none-any.whl (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.3/65.3 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Installing collected packages: multitasking, yfinance
Successfully installed multitasking-0.0.11 yfinance-0.2.28


In [21]:
import pandas as pd
import yfinance as yf
import datetime

# calculate start and end dates
end_date = datetime.date.today()
start_date = end_date - datetime.timedelta(days=365) # 4 years

# download price data
price_data = yf.download('SPOT', start=start_date,
                end=end_date, progress=False,
                )

In [22]:
price_data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-08-14,137.300003,139.664993,135.339996,139.380005,139.380005,1942900
2023-08-15,138.380005,138.580002,133.270004,133.429993,133.429993,2125300
2023-08-16,132.600006,136.139999,132.279999,134.350006,134.350006,1584900
2023-08-17,134.350006,134.854004,131.190002,131.570007,131.570007,1129300
2023-08-18,129.899994,132.475006,129.229996,131.869995,131.869995,1567154


In [23]:
price_data.shape

(251, 6)

In [24]:
# set index as column
price_data["Date"] = price_data.index
price_data.reset_index(drop=True, inplace=True)

In [25]:
price_data.tail()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Date
246,137.300003,139.664993,135.339996,139.380005,139.380005,1942900,2023-08-14
247,138.380005,138.580002,133.270004,133.429993,133.429993,2125300,2023-08-15
248,132.600006,136.139999,132.279999,134.350006,134.350006,1584900,2023-08-16
249,134.350006,134.854004,131.190002,131.570007,131.570007,1129300,2023-08-17
250,129.899994,132.475006,129.229996,131.869995,131.869995,1567154,2023-08-18


## 3. Visualize Data

Visualize Stock Price Data using candlestick chart:

In [26]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Candlestick(
                        x=price_data["Date"],
                        open=price_data["Open"],
                        high=price_data["High"],
                        low=price_data["Low"],
                        close=price_data["Close"])])
fig.update_layout(title="Spotify Stock Price Analysis",
                  xaxis_rangeslider_visible=True)                  
fig.show()

## 4. Training Model

In [27]:
X = price_data[["Open", "High", "Low", "Volume"]]
target = price_data["Close"]

X = X.to_numpy()
target = target.to_numpy().reshape(-1, 1)

In [29]:
# split into training and testing data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, target,
                                        test_size=0.2,
                                        random_state=42)

In [31]:
# neural network for LSTM
from keras.models import Sequential
from keras.layers import Dense, LSTM

model = Sequential()
model.add(LSTM(64, return_sequences=True, 
               input_shape= (X_train.shape[1], 1)))
model.add(LSTM(32, return_sequences=False))
model.add(Dense(10))
model.add(Dense(1))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 4, 64)             16896     
                                                                 
 lstm_1 (LSTM)               (None, 32)                12416     
                                                                 
 dense (Dense)               (None, 10)                330       
                                                                 
 dense_1 (Dense)             (None, 1)                 11        
                                                                 
Total params: 29,653
Trainable params: 29,653
Non-trainable params: 0
_________________________________________________________________


In [32]:
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train,
          batch_size=1, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7d2efcf00d30>

In [44]:
import numpy as np

features = np.array([X[-1]])
features
model.predict(features)



array([[129.10312]], dtype=float32)

The predicted Close Price is 129.10