In [16]:
import pandas as pd
import yfinance as yf
import numpy as np
import plotly.express as px

### Resources
https://observablehq.com/@mbostock/the-wealth-health-of-nations

https://www.gapminder.org/tools/?from=world#$ui$chart$cursorMode=hand;&projector:true;&model$markers$bubble$encoding$size$data$concept=co2_emissions_tonnes_per_person&space@=country&=time;;&scale$domain:null&type:null&zoomed:null;;&frame$value=2018;;;;;&chart-type=bubbles&url=v1

In [15]:


#df = df.sort_values(['Stock', 'Date'])#.rolling(window = 20).corr(pairwise = True)
#corr = df.loc[:, ['Date', 'Stock']]

Unnamed: 0,Date,Stock,Price,Exposure,Expected Return,Expected Value
26,1/1/2022,7261 JT EQUITY,885.00,0.026109,0.326124,1173.62
431,1/10/2022,7261 JT EQUITY,980.00,0.028793,0.200388,1176.38
476,1/11/2022,7261 JT EQUITY,994.00,0.029266,0.183793,1176.69
521,1/12/2022,7261 JT EQUITY,1003.00,0.029698,0.173470,1176.99
566,1/13/2022,7261 JT EQUITY,1014.00,0.030146,0.161045,1177.30
...,...,...,...,...,...,...
2920,3/9/2022,ZNGA,9.14,0.003047,-0.026258,8.90
3190,4/12/2022,ZNGA,8.92,0.002973,0.043722,9.31
3235,4/20/2022,ZNGA,8.94,0.002980,0.043624,9.33
3281,4/21/2022,ZNGA,8.75,0.002917,0.066286,9.33


In [17]:
### Initial Operations
def returns(df):
    """
    close-to-close returns
    """
    return df.Close / df.Close.shift(1) - 1


def vwap(df):
    """
    volume-weighted average price 
    """
    return (df.Volume * df.Close) / df.Volume 


def get_ticker_history_and_process(
        ticker:str, 
        period:str, 
        interval:str):
        
    # Get ticker history
    df = yf.Ticker(ticker).history(period=period, interval = interval)
    
    # Remove cases where volume is zero
    df = df[df['Volume'] != 0]
    df['vwap'] = vwap(df)
    df['returns'] = returns(df)
    #check_returns(df)

    df.insert(0, "ticker", ticker)
    #df = generate_features(df) ### COMMENT IN WHEN RUNNING
    return df


def build_frame(
    tickers:list,
    period:str = '2y',
    interval:str = '1d',
    verbose: bool() = False
    ):

    results = pd.DataFrame()

    for ticker in tickers:
        assert len(ticker) != 1, f"Ticker in Tickers being read as: {ticker}. \n If entering single ticker, use list brackets."
        # Get ticker history
        df = yf.Ticker(ticker).history(period=period, interval = interval)
        df['vwap'] = vwap(df)
        df['returns'] = returns(df)
        df.insert(0, "Ticker", ticker)
        results = pd.concat([results, df])
    
    print("Shape of Final df:", results.shape)
    return results

In [76]:
tickers = ['SSSS', 'GBDC', 'POLY.L', 'SKF.AX', 'PSDN', 'VRNOF', 'LFMD', 'CLRB', 'GLATF', 'URNM', 'AAPL', 'GOOGL', 'CVX']
df = build_frame(tickers)
df = df.loc[:, ['Ticker', 'vwap']]

df.dropna(axis = 0, how = 'all', inplace=True)

df['date'] = pd.to_datetime(df.index, utc = True).date
df.reset_index(drop = False, inplace = True)
#df.drop(['Date'], axis = 1, inplace = True)

print(df.columns, "\n", df.index)

# Get price return
df['price_return'] = df.groupby('Ticker')['vwap'].pct_change(1)
df = df.dropna(axis = 0)
df

Shape of Final df: (6420, 11)
Index(['Date', 'Ticker', 'vwap', 'date'], dtype='object') 
 RangeIndex(start=0, stop=6420, step=1)


Unnamed: 0,Date,Ticker,vwap,date,price_return
1,2021-05-14 00:00:00-04:00,SSSS,9.106423,2021-05-14,0.003232
2,2021-05-17 00:00:00-04:00,SSSS,9.568040,2021-05-17,0.050691
3,2021-05-18 00:00:00-04:00,SSSS,9.400179,2021-05-18,-0.017544
4,2021-05-19 00:00:00-04:00,SSSS,9.316249,2021-05-19,-0.008929
5,2021-05-20 00:00:00-04:00,SSSS,9.386190,2021-05-20,0.007507
...,...,...,...,...,...
6415,2023-05-08 00:00:00-04:00,CVX,159.580002,2023-05-08,-0.003932
6416,2023-05-09 00:00:00-04:00,CVX,159.119995,2023-05-09,-0.002883
6417,2023-05-10 00:00:00-04:00,CVX,157.089996,2023-05-10,-0.012758
6418,2023-05-11 00:00:00-04:00,CVX,156.220001,2023-05-11,-0.005538


### Make up some exposures

In [77]:
# generate two random numbers, each between 0 and 10
#random()
from random import uniform

tickers = list(set(df['Ticker']))
df['Exposures'] = None

# Randomly generate exposures
for ticker in tickers:
    r1 = uniform(0, 10.0)  
    r2 = uniform(0, 10.0)

    n_points = df[df['Ticker'] == ticker].shape[0]
    exposures = np.linspace(r1, r2, n_points)
    df.loc[df['Ticker'] == ticker, ['Exposures']] = exposures


title = "Ticker Exposures over Time for Imaginary Portfolio"
px.line(df, x = 'Date', y = 'Exposures', color = 'Ticker', title = title)

In [87]:
px.line(df, x = 'Date', y = 'price_return', color = 'Ticker', title = title)

In [113]:
# Convert datetime to date
df['Date'] = pd.to_datetime(df['Date'], utc = True).dt.date

# Calculate weighted returns
df['weighed_rets'] = df['price_return'] * df['Exposures']

In [266]:
df.drop_duplicates().shape

(6307, 8)

In [257]:
# Calculate rolling correlation

# may need to convert to matrix
corr = df.loc[:, ['Date', 'Ticker', 'weighed_rets']]\
         .pivot( columns=[ 'Date', "Ticker"], values="weighed_rets",)
        #  .dropna()\
        #  .rolling(window = 21)\
        #  .corr()\
        #  .dropna()\
        #  .reset_index()\
        #  .rename(index = {'Ticker':'Correlation'})
         #.melt(id_vars=['Date', 'Ticker'])\
         #.mean(axis = 0)
         
         
         

corr.index = corr.index.set_names("Index")
corr.columns

corr.reset_index()
corr.index.rename('Index', inplace=True)            
corr.columns
corr
#
#corr.index = "A"

#corr = corr.melt(id_vars=['Date', 'Ticker'])

#corr.columns = ['Date', 'Ticker1', 'Ticker2', 'pairwise_corr']
#corr.melt(id_vars=['Date', 'Ticker'])
#corr[corr['Ticker1'] != corr['Ticker2']]

Date,2021-05-14,2021-05-17,2021-05-18,2021-05-19,2021-05-20,2021-05-21,2021-05-24,2021-05-25,2021-05-26,2021-05-27,...,2023-05-01,2023-05-02,2023-05-03,2023-05-04,2023-05-05,2023-05-08,2023-05-09,2023-05-10,2023-05-11,2023-05-12
Ticker,SSSS,SSSS,SSSS,SSSS,SSSS,SSSS,SSSS,SSSS,SSSS,SSSS,...,CVX,CVX,CVX,CVX,CVX,CVX,CVX,CVX,CVX,CVX
Index,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1,0.00512,,,,,,,,,,...,,,,,,,,,,
2,,0.080477,,,,,,,,,...,,,,,,,,,,
3,,,-0.027915,,,,,,,,...,,,,,,,,,,
4,,,,-0.014239,,,,,,,...,,,,,,,,,,
5,,,,,0.011999,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6415,,,,,,,,,,,...,,,,,,-0.022044,,,,
6416,,,,,,,,,,,...,,,,,,,-0.016165,,,
6417,,,,,,,,,,,...,,,,,,,,-0.071572,,
6418,,,,,,,,,,,...,,,,,,,,,-0.031082,


In [267]:
corr = t.dropna() # produces no result when NAs are included
corr = corr.rolling(window = 21).corr()

In [None]:

df = pd.pivot(df, index = 'date', columns = 'Ticker', values = 'vwap')

print(df.columns)

# Create correlation plot
df = df.rename_axis(None).rename_axis(None, axis=1).corr()
df = df.stack().reset_index()
df = pd.DataFrame(df)
df.columns = ['source', 'target', 'Correlation']
df = df[df['Correlation'] != 1]

df = df.drop_duplicates()

df["oneway"] = df.apply(lambda x: not df[
                    (df["source"] == x["target"]) & (df["target"] == x["source"]) & (df.index != x.name)].empty,axis=1)

df = df[df['oneway'] == True].drop("oneway", axis = 1)

df.to_csv("ticker_corr2.csv", index = False)

df = df.groupby(['source'], as_index = False).mean().round(3).rename(columns={'source':'name'})

df = df.drop_duplicates()

df.to_csv("ticker_nodes.csv")