# Install some necessary packages

In [0]:
# activates the matplotlib to be displayed inline
%matplotlib inline 
%pip install pandas-datareader
%pip install quandl
%pip install python-dotenv
%pip install yfinance

# Study of Stock Portfolio

In [0]:
import os
import random 
import pandas_datareader.data as web
import pandas as pd
import datetime
import numpy as np
from ipywidgets import interact, fixed, IntSlider

import matplotlib.pyplot as plt
from numpy.linalg import cholesky
import seaborn as sns
import quandl # import quandl package
quandl.ApiConfig.api_key = "FX-yxABrQh8R4VCJu8_q" # register at quandl.com and get the API key to feed data
mydata = quandl.get("FRED/GDP")

from dotenv import load_dotenv
load_dotenv()
# magics for import statements
# %load_ext dotenv 
# %dotenv

# cache data for this period
all_data_start, all_data_end = "2016-01-01", "2020-07-01"

# by default work with data in this period
default_start, default_end = "2017-01-01", "2019-07-01"


##Using the yfinance package

In [0]:
import yfinance as yf

# Fetch stock price data for Apple and Microsoft
aapl = yf.download("AAPL")
msft = yf.download("MSFT")
# Concatenate Apple and Microsoft stock price dataframes
combined_df = pd.concat([aapl, msft], axis=1)
combined_df.head()

In [0]:
import matplotlib.pyplot as plt

# Plot the combined stock price data
combined_df['Close'].plot(figsize=(10, 6))

# Set the chart title and labels
plt.title('Stock Price Comparison')
plt.xlabel('Date')
plt.ylabel('Closing Price')

# Display the chart
plt.show()

In [0]:
aapl.Close.plot()

## Get stock data

In [0]:
# pandas datareader
aapl = web.DataReader("AAPL.US","quandl","2015-01-01", "2023-12-31", api_key = "FX-yxABrQh8R4VCJu8_q")
aapl.head()

In [0]:
aapl.Close.plot()

In [0]:
visa = web.DataReader("V.US","quandl","2015-01-01", "2016-01-01", api_key = "FX-yxABrQh8R4VCJu8_q")
visa.Close.plot()
# to adjust for the stock split
visa.AdjClose.plot()

## Utility: get adjusted close, cache recent years

In [0]:
cached_data = {}
#def stock_hist (symbol, start=None, end=None): 
#    """Convenience function to get cached data"""
#    start = start if start else default_start
#    end = end if end else default_end
#    if not symbol in cached_data:
#        cached_data[symbol] = web.DataReader(symbol + ".US", "quandl", all_data_start, all_data_end, api_key = "FX-yxABrQh8R4VCJu8_q")
#        print(f"Loaded {symbol} num values = {len(cached_data[symbol])}")
#    return cached_data[symbol][start:end]
def stock_hist (symbol, start=None, end=None):
    start = start if start else default_start
    end = end if end else default_end
    cached_data[symbol] = web.DataReader(symbol + ".US", "quandl", start, end , api_key = "FX-yxABrQh8R4VCJu8_q")['AdjClose']
    return cached_data[symbol]

In [0]:
start_dt = "2016-01-01"
end_dt = "2016-01-10"
web.DataReader("AAPL.US", "quandl", start = start_dt, end = end_dt , api_key = "FX-yxABrQh8R4VCJu8_q")['AdjClose']
# result for one stock
stock_hist("AAPL").head()


## Look at a basket of stocks

In [0]:
tickers_list = ['AAPL','FB','GOOG']
N = len(tickers_list)
historical = pd.concat((stock_hist(symbol) for symbol in tickers_list), axis = 1, keys = tickers_list)
_=historical.plot() # underscore can be used if don't want to assign the value to anything specific
historical.head()

In [0]:
# relative change in prices, price at t divided by the first price row
returns_na = (historical/historical.iloc[0]) #.fillna(method='backfill')
returns = (historical/historical.iloc[0]).fillna(method='backfill') 
_=returns_na.plot()

## Create an equally weighted portfolio

In [0]:
# the glitch is caused by missing values, can be avoided by including the fillna method in the previous step
returns_na['PORTFOLIO'] = returns_na.iloc[:,0:N].sum(axis=1) / N
# iloc below: it can be omitted. It slices all rows (as defined by the row index in iloc ":"), and 0:N columns, which is basically all columns
# axis = 1 means sum along the columns, N was defined above as the length of ticker list
returns['PORTFOLIO'] = returns.iloc[:,0:N].sum(axis=1) / N
returns_na.plot()

In [0]:
symbols = ['AAPL','TSLA','FB','IBM','GOOG']
prices = [stock_hist(symbol) for symbol in symbols]
unit_pos = [p / p[-5] for p in prices]
df = pd.DataFrame(unit_pos)

basket = sum(u for u in unit_pos) / len(unit_pos)
df = pd.DataFrame(basket)
df

In [0]:
# **active is a **kwargs
# **kwargs allows you to pass keyworded variable length of arguments to a function.
def diversicheck(symbols, start_day=0, **active):
    # The strptime() method creates a datetime object from the given string.
    start = datetime.datetime.strptime(default_start, "%Y-%m-%d") + datetime.timedelta(start_day)
    end = start + datetime.timedelta(days=365)
    # filtered is a kind of check if symbol is valid
    filtered = [symbol for symbol in symbols if active.get(symbol, True)]
    prices = [stock_hist(symbol) for symbol in filtered]
    unit_pos = [p / p[-1-start_day] for p in prices]
    basket = sum(u for u in unit_pos) / len(unit_pos)
    for p in unit_pos:
        p.plot(color='b', alpha=0.3)
    basket.plot(figsize=(20,10))
    print(f"Basket from {start} to {end}")

In [0]:
# Notes: 
# To use interact, you need to define a function that you want to explore. Hence diversicheck is created in the step before.
# fixed() is used within interact which fixes arguments to specific non-modifiable values.

prf_stocks = ['AAPL','TSLA','FB','IBM','GOOG']
active = dict(zip(prf_stocks,[True]*len(prf_stocks)))
_= interact(diversicheck, 
    symbols= fixed(prf_stocks),
    start_day = IntSlider(min=0,max=365, step=1,
                    description='Start date:',
                    disabled=False,
                    continuous_update=True,
                    orientation='horizontal',
                    readout=True,
                    readout_format='d'))

## Simulate forwards

In [0]:
prf_prices = pd.concat((stock_hist(symbol) for symbol in prf_stocks), axis = 1, keys = prf_stocks)
prf_returns = (prf_prices.pct_change() + 1)[1:]
log_returns = np.log(prf_returns)
corr = log_returns.corr()
sns.heatmap(corr, annot=True)

In [0]:
vols = log_returns.std() * np.sqrt(252)
avg_returns = (prf_returns-1).mean()
fig, ax = plt.subplots()
ax.scatter(vols, avg_returns)
for i, txt in enumerate(prf_stocks):
    ax.annotate(txt, (vols[i],avg_returns[i]))

## Bootstrap (Monte-carlo simulation)

In [0]:
# k=60 Returns a list with 60 items, ie. 60 samples are taken 1000 times (60,000 samples in total)
simulated = pd.DataFrame([((prf_returns.iloc[random.choices(
    range(len(prf_returns)),k=60)]).mean(axis=1)).cumprod().values
    for x in range(1000)]).T
simulated.head()

In [0]:
simulated.plot(legend=False, linewidth=1, alpha=0.1, color="blue")

In [0]:
simulated.quantile([0.05,0.5,0.95], axis=1).T.plot()