In [1]:
# Initial imports
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from MCForecastTools import MCSimulation
from pathlib import Path
import datetime as dt
import seaborn as sns

import matplotlib.pyplot as plt

%matplotlib inline

ModuleNotFoundError: No module named 'seaborn'

In [None]:
plt.rcParams["figure.figsize"] = (22,12)


In [None]:
# Load .env enviroment variables
load_dotenv()


In [None]:
# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

# Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

In [None]:
start_date = pd.Timestamp("2017-05-19", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2020-05-19", tz="America/New_York").isoformat()

# Set the tickers
tickers = ["GME"]

# Set timeframe to '1D' for Alpaca API
timeframe = "1D"


df_gamestop_1 = alpaca.get_barset(
    tickers,
    timeframe,
    start=start_date,
    end=end_date,
    limit=1000,
).df

# Preview DataFrame
df_gamestop_1.head()

In [None]:
df_gamestop_1.index = df_gamestop_1.index.values.astype('datetime64[D]')
df_gamestop_1.index = df_gamestop_1.index.set_names(['Date'])
df_gamestop_1.tail()

In [None]:
df_gamestop_1.to_csv("df_gamestop_1.csv")

In [None]:
MC_ten_year_1 = MCSimulation(
    portfolio_data = df_gamestop_1,
    weights = [1],
    num_simulation = 500,
    num_trading_days = 252 * 10
)

In [None]:
MC_ten_year_1.portfolio_data.head()

In [None]:
MC_ten_year_1.calc_cumulative_return()

In [None]:
Sim_plot_prevolatility = MC_ten_year_1.plot_simulation()

In [None]:
dist_plot_1 = MC_ten_year_1.plot_distribution()

In [None]:
tbl_1 = MC_ten_year_1.summarize_cumulative_return()

print(tbl_1)

In [None]:

start_date = pd.Timestamp("2018-05-19", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2021-05-19", tz="America/New_York").isoformat()

# Set the tickers
tickers = ["GME"]

# Set timeframe to '1D' for Alpaca API
timeframe = "1D"


df_gamestop_2 = alpaca.get_barset(
    tickers,
    timeframe,
    start=start_date,
    end=end_date,
    limit=1000,
).df

# Preview DataFrame
df_gamestop_2.head()



In [None]:


df_gamestop_2.index = df_gamestop_2.index.values.astype('datetime64[D]')
df_gamestop_2.index = df_gamestop_2.index.set_names(['Date'])
df_gamestop_2.tail()

In [None]:
df_gamestop_2.to_csv("df_gamestop_2.csv")

In [None]:
MC_ten_year_2 = MCSimulation(
    portfolio_data = df_gamestop_2,
    weights = [1],
    num_simulation = 500,
    num_trading_days = 252 * 10
)

In [None]:
GME_returns = MC_ten_year_2.portfolio_data

In [None]:
GME_returns.head()

In [None]:
GME_daily_return = GME_returns['GME']['daily_return'] 

In [None]:
GME_daily_return

In [None]:
GME_daily_return = GME_daily_return.dropna()

GME_daily_return.isnull().sum()

In [None]:
#GME daily returns plot

GME_return_plt = GME_daily_return.plot(figsize=(30,12), title='Daily Returns of GME')
GME_return_plt.set(ylabel='Returns (%)')

plt.axhline(y=0.0, color='k', alpha=0.5, linestyle='--')  
plt.show()

In [None]:
MC_ten_year_2.calc_cumulative_return()

In [None]:
Sim_plot_postvolatility = MC_ten_year_2.plot_simulation()
plt.savefig("foo.png")

In [None]:
dist_plot_2 = MC_ten_year_2.plot_distribution()

In [None]:
tbl_2 = MC_ten_year_2.summarize_cumulative_return()

print(tbl_2)

In [None]:
#Import Twitter data for GME mentions 

twitter_data_path = Path("twitter_data_2021.csv")

twitter_data = pd.read_csv(twitter_data_path, parse_dates=True, infer_datetime_format=True)

twitter_data['Date']=pd.to_datetime(twitter_data['Date'])

twitter_data = twitter_data.set_index('Date')

twitter_data.tail()



In [None]:
import datetime as dt

df_gamestop_2.index = df_gamestop_2.index.values.astype('datetime64[D]')
df_gamestop_2.index = df_gamestop_2.index.set_names(['Date'])
df_gamestop_2.tail()

In [None]:
Social_GME_data = pd.merge(twitter_data, df_gamestop_2['GME'], how='outer', on='Date')

Social_GME_data = Social_GME_data.drop(['gamestock','gamestonk','gme','r/wallstreetbets'], axis =1)

Social_GME_data = Social_GME_data.drop(['open','high','low','volume'], axis =1)

Social_GME_data.head()

In [None]:
Social_GME_data_pct = Social_GME_data .pct_change()

Social_GME_data_pct.head()

In [None]:
Social_GME_pct_plot  = Social_GME_data_pct.dropna() 
Social_GME_pct_plot.head()

Social_GME_pct_plot.plot()


In [None]:
social_GME_corr = Social_GME_pct_plot.corr()

social_GME_corr

sns.heatmap(social_corr ,vmin=-1, vmax=1)

In [None]:
social_GME_corr

In [None]:
GME_daily_return

In [None]:
daily_returns_ewm = GME_daily_return.ewm(halflife=21).std()

In [None]:

GME_ewm_std = daily_returns_ewm.plot(figsize=(30,12), title='Exponentially Weighted Standard Deviation (21-day halflife) of GME')
GME_ewm_std.set(ylabel='standard deviation')
plt.show()

In [None]:
#group by absolute number of tweets / relationship to stock price (percent change of price) - absolute number of tweets as a proxy for interest in stock


In [None]:
Social_GME_data.head()

In [None]:
#Social_GME_data_index = Social_GME_data.reset_index().dropna() 

In [None]:
Social_GME_data_index

In [None]:
scatterplot_GME = sns.scatterplot(data=Social_GME_data, x="Date", y="close", size='total_tweets', hue="total_tweets")


In [None]:
# Plots I want to use to tell the financial data story

#daily return plot
GME_return_plt

#Simulation plots pre and post stock price volatility
Sim_plot_prevolatility
Sim_plot_postvolatility

#Moving weighted average standard deviation plot to show volatility
GME_ewm_std

# Pct change correlation between Social mentions and Stock price changes
social_GME_corr

#Scatterplot graph showing a relationship between date / stock price and twitter mentions (Just need to adjust the x axis scale and show dates from 2019 onwards)
scatterplot_GME


