In [None]:
import pandas as pd

#load the stock data 
file_path = r"C:\UU\THESIS\AMX\HYPOTHESIS 1\DELETED_STOCKS.xlsx"
stock_data = pd.read_excel(file_path, sheet_name=None)

combined_data = pd.DataFrame()

#combine the data
for sheet_name in stock_data.keys():
    sheet_data = stock_data[sheet_name]
    
    if 'date' in sheet_data.columns:
        sheet_data['date'] = pd.to_datetime(sheet_data['date'])
        sheet_data['Company'] = sheet_name
        combined_data = pd.concat([combined_data, sheet_data])
    else:
        print(f"'Date' column missing in sheet: {sheet_name}")

combined_data['date'] = pd.to_datetime(combined_data['date'])
combined_data.set_index('date', inplace=True)

combined_data.head()
print("Combined Data:")
print(combined_data.head())
print(combined_data.info())

In [None]:
#calculate equal-weighted portfolio returns
equal_weighted_returns = combined_data.groupby('date')['stock_return'].mean()

#calculate value-weighted portfolio returns
combined_data['Market_Value'] = combined_data['market_value'] * combined_data['shares_outstanding'] #market_value was per share
value_weighted_returns = combined_data.groupby('date').apply(
    lambda x: (x['stock_return'] * x['Market_Value']).sum() / x['Market_Value'].sum()
)

print("\nEqual-Weighted Returns:")
print(equal_weighted_returns.head())

print("\nValue-Weighted Returns:")
print(value_weighted_returns.head())


In [None]:

def classify_size(market_value, median_value):
    return 'Small' if market_value < median_value else 'Big'

def classify_value(pbv, median_pbv):
    return 'Low' if pbv < median_pbv else 'High'

#calculate median values for classification
median_market_value = combined_data['market_value'].median()
median_pbv = combined_data['price_to_book_value'].median()

#classify stocks
combined_data['Size'] = combined_data['market_value'].apply(lambda x: classify_size(x, median_market_value))
combined_data['Value'] = combined_data['price_to_book_value'].apply(lambda x: classify_value(x, median_pbv))

#calculate SMB (Small Minus Big)
small_returns = combined_data[combined_data['Size'] == 'Small'].groupby('date')['stock_return'].mean()
big_returns = combined_data[combined_data['Size'] == 'Big'].groupby('date')['stock_return'].mean()
smb = small_returns - big_returns

#calculate HML (High Minus Low)
high_returns = combined_data[combined_data['Value'] == 'High'].groupby('date')['stock_return'].mean()
low_returns = combined_data[combined_data['Value'] == 'Low'].groupby('date')['stock_return'].mean()
hml = high_returns - low_returns

#calculate MOM (Momentum)
momentum_data = combined_data.groupby('Company')['stock_return'].rolling(window=12, min_periods=2).sum().reset_index()
momentum_data['Momentum'] = momentum_data.groupby('Company')['stock_return'].shift(1)
momentum_data['Momentum'] = momentum_data['Momentum'].fillna(0)
median_momentum = momentum_data['Momentum'].median()
momentum_data['Momentum_Class'] = momentum_data['Momentum'].apply(lambda x: 'Winner' if x > median_momentum else 'Loser')

winner_returns = momentum_data[momentum_data['Momentum_Class'] == 'Winner'].groupby('date')['stock_return'].mean()
loser_returns = momentum_data[momentum_data['Momentum_Class'] == 'Loser'].groupby('date')['stock_return'].mean()
mom = winner_returns - loser_returns

print(smb.head())

print(hml.head())

print(mom)



In [4]:

market_returns = combined_data.groupby('date')['market_return'].mean()
#calculate excess market return
excess_market_return = market_returns - combined_data['rf'].mean()
#calculate market-adjusted returns for equal-weighted and value-weighted portfolios
equal_weighted_adjusted_returns = equal_weighted_returns - market_returns
value_weighted_adjusted_returns = value_weighted_returns - market_returns
equal_weighted_adjusted_returns.to_excel(r"C:\UU\THESIS\AMX\HYPOTHESIS 1\DELETED_equal_weighted_returns.xlsx", index=False)
value_weighted_adjusted_returns.to_excel(r"C:\UU\THESIS\AMX\HYPOTHESIS 1\DELETED_value_weighted_returns.xlsx", index=False)

In [5]:
import statsmodels.api as sm

factors = pd.DataFrame({
    'Date': smb.index,
    'SMB': smb.values,
    'HML': hml.values,
    'MOM': mom.values,
    'Excess_Market_Return': excess_market_return.values
})

factors = factors.fillna(0)



In [None]:

equal_path = r"C:\UU\THESIS\AMX\HYPOTHESIS 1\DELETED_equal_weighted_returns.xlsx"
value_path = r"C:\UU\THESIS\AMX\HYPOTHESIS 1\DELETED_value_weighted_returns.xlsx"
equal_weighted_adjusted_returns_df =pd.read_excel(equal_path)
value_weighted_adjusted_returns_df = pd.read_excel(value_path)
# Check if the adjusted returns DataFrames are empty
if equal_weighted_adjusted_returns_df.empty or value_weighted_adjusted_returns_df.empty:
    print("One of the adjusted returns DataFrames is empty. Check the input data and calculations.")
else:
    # Merge the portfolio returns with the factors
    equal_weighted_data = pd.merge(equal_weighted_adjusted_returns_df, factors, on='Date' )
    value_weighted_data = pd.merge(value_weighted_adjusted_returns_df, factors,on='Date')

    # Ensure the merged data is not empty
    if equal_weighted_data.empty or value_weighted_data.empty:
        print("One of the merged DataFrames is empty. Check the input data and calculations.")
    else:
        # Define the regression model
        import statsmodels.api as sm

        def run_regression(data, portfolio_name):
            y = data[portfolio_name]
            X = data[['Excess_Market_Return', 'SMB', 'HML', 'MOM']]
            X = sm.add_constant(X)
            model = sm.OLS(y, X).fit()
            print(f"Regression results for {portfolio_name}:")
            print(model.summary())

        # Run regressions
        run_regression(equal_weighted_data, 'returns')
        run_regression(value_weighted_data, 'returns')