In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import getFamaFrenchFactors as gff

# Set plot style for better aesthetics
sns.set(style="whitegrid")

# Step 1: Load and preprocess portfolio data
df_daily_ticker = pd.read_excel(r"C:\Users\MukeshwaranBaskaran\Downloads\Project_KISKI\Data\KISKI_Data_PositionLevel_Daily_Ticker_Net.xlsx")
df_daily_ticker.columns = df_daily_ticker.columns.str.strip()
df_daily_ticker.rename(columns={"Trade Dt": "Date"}, inplace=True)

# Ensure 'Date' is in datetime format
df_daily_ticker['Date'] = pd.to_datetime(df_daily_ticker['Date'])

# Step 2: Forward fill missing dates
df_daily_ticker.sort_values('Date', inplace=True)
df_daily_ticker['Date'] = df_daily_ticker['Date'].ffill()

# Now df_daily_ticker has forward-filled dates
df_daily_ticker.head()

Unnamed: 0,Date,Group,AUM BOD,AUM EOD,P&L,P&L (%),Net,Net (%),Gross,Gross(%),Market Value,Market Value (%)
0,2019-03-31,AKBA,6200000.0,6398866.0,225.22,3.6e-05,24570.0,0.003839743,24570.0,0.003839743,24570.0,0.003839743
45,2019-04-30,AABA,6398866.0,6687844.0,2185.0,0.000341,75390.0,0.01127269,75390.0,0.01127269,75390.0,0.01127269
127,2019-05-31,AABA,6687844.0,6836094.0,3422.40685,0.000512,,,,,,
216,2019-06-30,6EN9 Index,6836094.0,6915215.0,9337.95,0.001366,5.6955,8.236187e-07,5.6955,8.236187e-07,5.6955,8.236187e-07
291,2019-07-31,6EN9 Index,6915215.0,7133661.0,-7793.25,-0.001127,,,,,,


In [2]:
df_daily_factors = pd.read_csv(r"C:\Users\MukeshwaranBaskaran\Downloads\Project_KISKI\Data\F-F_Research_Data_5_Factors_2x3_daily.CSV")
df_daily_factors.head()

Unnamed: 0,Date,Mkt-RF,SMB,HML,RMW,CMA,RF
0,19630701,-0.67,0.02,-0.35,0.03,0.13,0.012
1,19630702,0.79,-0.28,0.28,-0.08,-0.21,0.012
2,19630703,0.63,-0.18,-0.1,0.13,-0.25,0.012
3,19630705,0.4,0.09,-0.28,0.07,-0.3,0.012
4,19630708,-0.63,0.07,-0.2,-0.27,0.06,0.012


In [3]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import yfinance as yf

# Load factor data
df_daily_factors = pd.read_csv(r"C:\Users\MukeshwaranBaskaran\Downloads\Project_KISKI\Data\F-F_Research_Data_5_Factors_2x3_daily.CSV")
df_daily_factors['Date'] = pd.to_datetime(df_daily_factors['Date'], format='%Y%m%d')

# Load portfolio data
df_daily_ticker = pd.read_excel(r"C:\Users\MukeshwaranBaskaran\Downloads\Project_KISKI\Data\KISKI_Data_PositionLevel_Daily_Ticker_Net.xlsx")
df_daily_ticker.columns = df_daily_ticker.columns.str.strip()
df_daily_ticker.rename(columns={"Trade Dt": "Date"}, inplace=True)
df_daily_ticker['Date'] = pd.to_datetime(df_daily_ticker['Date'])

# Handle missing values by forward filling
df_daily_ticker.fillna(method='ffill', inplace=True)

# Fetch historical prices using yfinance
groups = df_daily_ticker['Group'].unique()
tickers = " ".join(groups)  # Join tickers with space or use "," for comma

# Download adjusted closing prices
prices_data = yf.download(tickers, start="2023-01-01", end="2023-12-31")['Adj Close']

# Calculate daily returns for each group
returns_data = prices_data.pct_change().dropna()

# Merge portfolio data with factor data on 'Date'
df_combined = pd.merge(df_daily_ticker, df_daily_factors, on='Date', how='inner')

# Add returns to the combined dataframe
df_combined = df_combined.merge(returns_data, left_on=['Date', 'Group'], right_index=True, how='left')
df_combined.rename(columns={group: 'Return' for group in groups}, inplace=True)

# Define a function to calculate betas for each group using regression
def calculate_beta(stock_returns, factor_returns):
    X = sm.add_constant(factor_returns)
    model = sm.OLS(stock_returns, X).fit()
    return model.params[1:]  # Return betas excluding intercept

# Calculate betas for each group in the portfolio
betas = {}
for group in df_combined['Group'].unique():
    stock_data = df_combined[df_combined['Group'] == group]
    betas[group] = calculate_beta(stock_data['Return'].dropna(), stock_data[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])

# Calculate position-level beta exposures
df_combined['Net Exposure'] = df_combined['Shares'] * df_combined['Price']
df_combined['Beta Exposure'] = df_combined.apply(
    lambda row: np.dot(betas[row['Group']], row[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']]) * row['Net Exposure'], axis=1)

# Aggregate to get portfolio-level factor exposures
portfolio_exposure = df_combined.groupby('Date')['Beta Exposure'].sum()

print(portfolio_exposure.head())

  df_daily_ticker.fillna(method='ffill', inplace=True)
[                       1%                       ]  14 of 2505 completedFailed to get ticker 'MIT/U' reason: Expecting value: line 1 column 1 (char 0)
[                       1%                       ]  25 of 2505 completedCould not get exchangeTimezoneName for ticker '' reason: list index out of range
[*                      2%                       ]  51 of 2505 completedFailed to get ticker 'DMYQ/U' reason: Expecting value: line 1 column 1 (char 0)
[**                     5%                       ]  123 of 2505 completedFailed to get ticker 'OPA/WS' reason: Expecting value: line 1 column 1 (char 0)
[***                    7%                       ]  165 of 2505 completedFailed to get ticker 'ZGN/WS' reason: Expecting value: line 1 column 1 (char 0)
[****                   9%                       ]  221 of 2505 completedFailed to get ticker 'PSTH/U' reason: Expecting value: line 1 column 1 (char 0)
[****                   9%    

ValueError: len(left_on) must equal the number of levels in the index of "right"

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Step 1: Load and preprocess portfolio data
df_daily_ticker = pd.read_excel(r"C:\Users\MukeshwaranBaskaran\Downloads\Project_KISKI\Data\KISKI_Data_PositionLevel_Daily_Ticker_Net.xlsx")
df_daily_ticker.columns = df_daily_ticker.columns.str.strip()
df_daily_ticker.rename(columns={"Trade Dt": "Date"}, inplace=True)

# Ensure 'Date' is in datetime format
df_daily_ticker['Date'] = pd.to_datetime(df_daily_ticker['Date'])

# Step 2: Forward fill missing dates
df_daily_ticker.sort_values('Date', inplace=True)
df_daily_ticker['Date'] = df_daily_ticker['Date'].ffill()

# Step 3: Load and preprocess factor data
df_daily_factors = pd.read_csv(r"C:\Users\MukeshwaranBaskaran\Downloads\Project_KISKI\Data\F-F_Research_Data_5_Factors_2x3_daily.CSV")
df_daily_factors['Date'] = pd.to_datetime(df_daily_factors['Date'], format='%Y%m%d')

# Step 4: Merge portfolio and factor data
df_merged = pd.merge(df_daily_ticker, df_daily_factors, on='Date')

# Check for missing values
print("Missing values in merged data:\n", df_merged.isnull().sum())

# Step 5: Calculate daily portfolio return
df_merged['Portfolio_Return'] = df_merged['Net (%)'] / 100  # Assuming Net (%) represents the portfolio return

# Ensure that all necessary columns are numeric
numeric_columns = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF', 'Portfolio_Return']
df_merged[numeric_columns] = df_merged[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Check for missing values again after conversion
print("Missing values after conversion:\n", df_merged[numeric_columns].isnull().sum())

# Drop rows with any missing values in numeric columns
df_merged.dropna(subset=numeric_columns, inplace=True)

# Step 6: Prepare data for multi-factor regression
X = df_merged[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']]
Y = df_merged['Portfolio_Return'] - (df_merged['RF'] / 100)  # Excess portfolio return

# Add constant to model
X = sm.add_constant(X)

# Fit the regression model
model = sm.OLS(Y, X).fit()

# Get factor betas
betas = model.params[1:]  # Exclude constant

# Step 7: Calculate active premium
annualized_portfolio_return = (1 + df_merged['Portfolio_Return']).prod() ** (252 / len(df_merged)) - 1
annualized_benchmark_return = (1 + (df_merged['RF'] / 100)).prod() ** (252 / len(df_merged)) - 1
active_premium = annualized_portfolio_return - annualized_benchmark_return

# Step 8: Calculate tracking error
tracking_error = np.std(df_merged['Portfolio_Return'] - (df_merged['RF'] / 100))

# Step 9: Calculate upside/downside capture ratios
upside_capture = (df_merged[df_merged['RF'] > 0]['Portfolio_Return'].mean() /
                  df_merged[df_merged['RF'] > 0]['RF'].mean()) if not df_merged[df_merged['RF'] > 0].empty else np.nan
downside_capture = (df_merged[df_merged['RF'] < 0]['Portfolio_Return'].mean() /
                    df_merged[df_merged['RF'] < 0]['RF'].mean()) if not df_merged[df_merged['RF'] < 0].empty else np.nan

# Step 10: Calculate information ratio
information_ratio = active_premium / tracking_error if tracking_error != 0 else np.nan

# Step 11: Calculate correlation with benchmark
correlation = df_merged['Portfolio_Return'].corr(df_merged['RF'] / 100)

# Step 12: Calculate maximum drawdown
cumulative_returns = (1 + df_merged['Portfolio_Return']).cumprod()
peak = cumulative_returns.cummax()
drawdown = (cumulative_returns - peak) / peak
max_drawdown = drawdown.min()

# Output results
print("Factor Betas:\n", betas)
print("Active Premium:", active_premium)
print("Tracking Error:", tracking_error)
print("Upside Capture Ratio:", upside_capture)
print("Downside Capture Ratio:", downside_capture)
print("Information Ratio:", information_ratio)
print("Correlation with Benchmark:", correlation)
print("Maximum Drawdown:", max_drawdown)


[**********************64%******                 ]  1615 of 2505 completedFailed to get ticker 'FST/WS' reason: Expecting value: line 1 column 1 (char 0)
[**********************65%******                 ]  1637 of 2505 completedFailed to get ticker 'HZON/WS' reason: Expecting value: line 1 column 1 (char 0)
[**********************66%*******                ]  1641 of 2505 completed