In [3]:
# import statements
import import_ipynb
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from format_data import LoadAndFormatData
from bin_portfolio import BinPortfolio
from sharpe_ratio import SharpeRatio
from data_update_michael import FindCoefficients
import tqdm

ModuleNotFoundError: No module named 'format_data'

In [None]:
#Generates the Profit and Loss of a specific month using an investment, portfolio, and subset of data for a date
def generate_PnL(portfolio,subset,invest):
    pnl = 0
    #Gets the PERMNOs of companies of the bin in a list
    PERMNO = portfolio["PERMNO"].to_list()

    #Divides the investment easily between stocks
    moneyPerStock = invest/len(PERMNO)
    
    for company in PERMNO:
        #Gets the buy price and next month sell price of a stock
        buyPrc = subset.loc[subset['PERMNO'] == company, 'PRC'].values
        sellPrc = subset.loc[subset['PERMNO'] == company, 'PRC_t+1'].values
        #Generates the stockreturn plus intial amount invested
        pnl += moneyPerStock*(sellPrc/buyPrc)


    #REMOVE THIS BEFORE SUBMISSION, TESTS IF THERE IS NANS TODO
    if pd.isna(pnl):
        print("RACA YOU FOOL")
        for item in PERMNO:
            buyPrc = subset.loc[subset['PERMNO'] == item, 'PRC'].values
            sellPrc = subset.loc[subset['PERMNO'] == item, 'PRC_t+1'].values
            print(buyPrc, sellPrc)
        print('\n')
    return pnl

In [None]:
# Load and modify the data according to our function
df = LoadAndFormatData()
df.head()

In [None]:
dependent_var = ['RET']
regressors = ['mkt', 'roic', 'BEME', 'ME']
all_dataframe_vars = regressors + dependent_var + ['DATE', 'PERMNO', 'RET_t+1', 'PRC', 'PRC_t+1']
# NOTE: RET is used to find coefficients, RET_t+1 is used to evaluate the portfolio's success

sharpe_ratios = []
dates_with_data = []
pnl_curves = []
intial_investment = 100
sum = intial_investment

for month in tqdm.tqdm(sorted(df['DATE'].unique())):
    # Obtain the data for the month, dropping any rows with missing values in our selected variables
    currentData = df[df['DATE'] == month]
    currentData = currentData[all_dataframe_vars]
    currentData = currentData.dropna()
    #Checks if a data frame is empty, dropping it because it would unusable
    if len(currentData.index) == 0:
        continue
    # Find the correct coefficients for OLS for our regressor variables
    go_long = FindCoefficients(dataframe=currentData, regressors=regressors)
    # NOTE: This finds the coefficients for the t-th time period. 
    # Forming the portfolio on this would be assuming that the relationship for the next time period will be similar to the one for this time period.

    # Form the portfolio using the data frame for the current month, selected variables, and OLS coeffiencts that were calculated
    portfolio = BinPortfolio(dataframe=currentData, all_dataframe_vars=all_dataframe_vars, vars_to_bin=regressors, go_long=go_long)
    #save the dates with performed data
    dates_with_data.append(month)
    # Calculate the Sharpe Ratio. These are calculated based on RET_t+1
    sharpe_ratios.append(SharpeRatio(portfolio=portfolio, df=df, rf=0))

    # Calculate the pnl curve using stocks from the bin and assuming that equal money is spent on each stock and the stock is sold at the end of the month TODO
    sum = generate_PnL(portfolio=portfolio, subset=currentData, invest=sum)
    pnl_curves.append(sum - 100)


In [None]:
sharpes = pd.Series(sharpe_ratios, index=dates_with_data)
sharpes.plot()

In [None]:
np.mean(sharpes)

In [None]:
#This plots the Pnl over 4 time periods (all time, 2000-present, 2010-2019, 2020-present)
fig, axs = plt.subplots(2, 2)
# y0, y1 = 1e-12,5e-1

axs[0,0].set_title('Pnl of All Time') 
axs[0,0].semilogy(dates_with_data, pnl_curves)
axs[0,0].grid()

axs[0,1].set_title('Pnl of 2000-Present') 
axs[0,1].semilogy(dates_with_data, pnl_curves)
axs[0,1].grid()

axs[1,0].set_title('Pnl of 2010-2019') 
axs[1,0].semilogy(dates_with_data, pnl_curves)
axs[1,0].grid()

axs[1,1].set_title('Pnl of 2020-Present') 
axs[1,1].semilogy(dates_with_data, pnl_curves)
axs[1,1].grid()


for ax in axs.flat:
    ax.set(xlabel='Dates', ylabel='Profit and Loss')
fig.tight_layout()
plt.show()