In [13]:
import os
import json
import requests

import pandas as pd
import mplfinance as mpf
import matplotlib.pyplot as plt


In [14]:
os.makedirs("data", exist_ok=True)
os.makedirs("visualisations", exist_ok=True)

Scraping daily data

In [15]:
# File path for the JSON data
file_path = "data/gme_daily_data.json"

# Check if the file already exists
if not os.path.exists(file_path):
    # Fetch the data only if the file does not exist
    url = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=GME&outputsize=full&apikey=71F522PIQRFAFZZO'
    response = requests.get(url)
    gme_daily_data = response.json()

    # Save the data to a JSON file
    with open(file_path, "w") as file:
        json.dump(gme_daily_data, file)

In [16]:
# Reading the JSON file
gme_daily_data_df = pd.read_json('data/gme_daily_data.json')

# Extracting the relevant time series data
time_series_daily_data = gme_daily_data_df['Time Series (Daily)'].dropna().to_dict()

# Creating a new DataFrame with the structured data
gme_daily_transformed_df = pd.DataFrame.from_dict(time_series_daily_data, orient='index')
gme_daily_transformed_df.reset_index(inplace=True)
gme_daily_transformed_df.rename(columns={'index': 'Date', '1. open': 'Open', '2. high': 'High', '3. low': 'Low', '4. close': 'Close', '5. volume': 'Volume'}, inplace=True)
gme_daily_transformed_df_sorted = gme_daily_transformed_df.sort_index()

# Displaying the first few rows of the transformed DataFrame
gme_daily_transformed_df_sorted.head()
gme_daily_transformed_df_sorted.dtypes

Date      object
Open      object
High      object
Low       object
Close     object
Volume    object
dtype: object

Changing datatypes

In [17]:
gme_daily_transformed_df = gme_daily_transformed_df.astype({
    'Date': 'datetime64[ns]',
    'Open': 'float',
    'High': 'float',
    'Low': 'float',
    'Close': 'float',
    'Volume': 'float'
})

Pickling the Dataframe to be used across Jupyter notebooks

In [18]:
# Pickle the DataFrame
gme_daily_transformed_df.to_pickle("scraping/gme_daily_transformed_df.pkl")


Plotting a Candlestick Chart of GME Stock Price during the Gamestop short squeeze

In [19]:
# Filtering the DataFrame to include only data from December 2020 to April 2021 and creating a copy
gme_jan_apr2021_df = gme_daily_transformed_df[(gme_daily_transformed_df['Date'] >= '2020-12-01') & (gme_daily_transformed_df['Date'] <= '2021-04-30')].copy()

# Ensure 'Date' is the index
gme_jan_apr2021_df.set_index('Date', inplace=True)

# Sorting the DataFrame by 'Date' in ascending order
gme_jan_apr2021_df.sort_index(inplace=True)

# Plotting the OHLC candlestick chart
mpf.plot(gme_jan_apr2021_df, 
         type='candle', 
         style='yahoo', 
         volume=True, 
         #tight_layout=True,
         datetime_format='%b %d, %Y',
         xrotation=45,
         title='GME Stock Price (Dec 2020-Apr 2021)',
         savefig='visualisations/gme_stock_price_candlestick_chart.png')
