### Loading Libraries

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_1samp

### Loading Dataset 

In [2]:
# Load the data
file_path = 'final_data.csv'
stock_data = pd.read_csv(file_path)

# Check for and drop the 'Unnamed: 0' column if it exists
if 'Unnamed: 0' in stock_data.columns:
    stock_data.drop(columns=['Unnamed: 0'], inplace=True)
    
stock_data.head()

Unnamed: 0,Adj Close,Volume,Date,Symbol,Year,Outstanding_Shares
0,429.1,1411814,01/01/2014,MCX,2014,50.998
1,424.3,2919045,02/01/2014,MCX,2014,50.998
2,499.6,8191055,03/01/2014,MCX,2014,50.998
3,497.8,8395828,06/01/2014,MCX,2014,50.998
4,517.0,6823517,07/01/2014,MCX,2014,50.998


### Data Cleaning

In [3]:
# Convert 'Outstanding_Shares' to numeric (if not already)
stock_data['Outstanding_Shares'] = pd.to_numeric(stock_data['Outstanding_Shares'], errors='coerce')

# Multiply 'Outstanding_Shares' by 10,000,000 (since it's in crores) before calculating turnover
stock_data['Outstanding_Shares'] *= 10000000

# Calculate 'Market_Cap'
stock_data['Market_cap'] = stock_data['Adj Close'] * stock_data['Outstanding_Shares']

# Calculate daily turnover
stock_data['Turnover'] = stock_data['Volume'] / stock_data['Outstanding_Shares']

# Convert 'Date' to datetime format
stock_data['Date'] = pd.to_datetime(stock_data['Date'], format="%d/%m/%Y")

# Set 'Date' as the index
stock_data.set_index('Date', inplace=True)

# Display the first few rows with the new Turnover column
stock_data.head()

Unnamed: 0_level_0,Adj Close,Volume,Symbol,Year,Outstanding_Shares,Market_cap,Turnover
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-01-01,429.1,1411814,MCX,2014,509980000.0,218832400000.0,0.002768
2014-01-02,424.3,2919045,MCX,2014,509980000.0,216384500000.0,0.005724
2014-01-03,499.6,8191055,MCX,2014,509980000.0,254786000000.0,0.016062
2014-01-06,497.8,8395828,MCX,2014,509980000.0,253868000000.0,0.016463
2014-01-07,517.0,6823517,MCX,2014,509980000.0,263659700000.0,0.01338


#### Rank Stocks Based on Size (MarketCap)

In [4]:
# Resample to monthly frequency to get the first price of each month
monthly_data = stock_data.groupby('Symbol').resample('BMS').first()

# Reset the index to make 'Symbol' and 'Date' columns again
monthly_data = monthly_data.reset_index(level=0, drop=True)

monthly_data['Size_Rank'] = monthly_data.groupby('Date')['Market_cap'].rank(method='first', ascending=True)

# Calculate deciles based on 'Size_Rank'
monthly_data['Size_Decile'] = monthly_data.groupby('Date')['Size_Rank'].transform(lambda x: pd.qcut(x, 10, labels=False) + 1)

monthly_data.reset_index(inplace=True)
monthly_data[monthly_data['Date'] == '2014-01-01']

Unnamed: 0,Date,Adj Close,Volume,Symbol,Year,Outstanding_Shares,Market_cap,Turnover,Size_Rank,Size_Decile
0,2014-01-01,3445.1,149,3MINDIA,2014,1.126500e+08,3.880905e+11,0.000001,174.0,6
120,2014-01-01,22.2,94557,AARTIIND,2014,3.332820e+09,7.398860e+10,0.000028,48.0,2
240,2014-01-01,591.9,75730,ABB,2014,2.119080e+09,1.254283e+12,0.000036,259.0,8
360,2014-01-01,1549.1,1444,ABBOTINDIA,2014,2.124900e+08,3.291683e+11,0.000007,156.0,5
480,2014-01-01,87.5,9623,ABFRL,2014,9.413900e+08,8.237162e+10,0.000010,54.0,2
...,...,...,...,...,...,...,...,...,...,...
39840,2014-01-01,69.3,6697155,YESBANK,2014,2.096607e+10,1.452949e+12,0.000319,268.0,8
39960,2014-01-01,259.5,406220,ZEEL,2014,9.604490e+09,2.492365e+12,0.000042,296.0,9
40080,2014-01-01,62.9,471605,ZENSARTECH,2014,2.227050e+09,1.400814e+11,0.000212,93.0,3
40200,2014-01-01,1991.7,812,ZFCVINDIA,2014,1.896800e+08,3.777857e+11,0.000004,171.0,6


#### Calculating J Month Past Returns 

In [5]:
# Calculate monthly returns in percentage
monthly_data['Monthly_Return'] = monthly_data.groupby('Symbol')['Adj Close'].pct_change() * 100

# Calculate cumulative returns for 3, 6, 9, and 12 months in percentage
monthly_data['3M_Return'] = monthly_data.groupby('Symbol')['Adj Close'].pct_change(3) * 100
monthly_data['6M_Return'] = monthly_data.groupby('Symbol')['Adj Close'].pct_change(6) * 100
monthly_data['9M_Return'] = monthly_data.groupby('Symbol')['Adj Close'].pct_change(9) * 100
monthly_data['12M_Return'] = monthly_data.groupby('Symbol')['Adj Close'].pct_change(12) * 100

monthly_data.reset_index(inplace=True)
monthly_data.head()

Unnamed: 0,index,Date,Adj Close,Volume,Symbol,Year,Outstanding_Shares,Market_cap,Turnover,Size_Rank,Size_Decile,Monthly_Return,3M_Return,6M_Return,9M_Return,12M_Return
0,0,2014-01-01,3445.1,149,3MINDIA,2014,112650000.0,388090500000.0,1.322681e-06,174.0,6,,,,,
1,1,2014-02-03,3443.1,76,3MINDIA,2014,112650000.0,387865200000.0,6.74656e-07,180.0,6,-0.058053,,,,
2,2,2014-03-03,3461.4,419,3MINDIA,2014,112650000.0,389926700000.0,3.719485e-06,175.0,6,0.531498,,,,
3,3,2014-04-01,3404.7,253,3MINDIA,2014,112650000.0,383539500000.0,2.245894e-06,167.0,5,-1.638066,-1.17268,,,
4,4,2014-05-01,3550.6,239,3MINDIA,2014,112650000.0,399975100000.0,2.121616e-06,165.0,5,4.285253,3.122186,,,


#### Ranking Stocks based on Past J Month Returns 

In [6]:
# Rank stocks based on cumulative returns
for period in ['3M', '6M', '9M', '12M']:
    rank_col = f'{period}_Rank'
    monthly_data[rank_col] = monthly_data.groupby('Date')[f'{period}_Return'].rank(method='first', ascending=True)

monthly_data.head()

Unnamed: 0,index,Date,Adj Close,Volume,Symbol,Year,Outstanding_Shares,Market_cap,Turnover,Size_Rank,Size_Decile,Monthly_Return,3M_Return,6M_Return,9M_Return,12M_Return,3M_Rank,6M_Rank,9M_Rank,12M_Rank
0,0,2014-01-01,3445.1,149,3MINDIA,2014,112650000.0,388090500000.0,1.322681e-06,174.0,6,,,,,,,,,
1,1,2014-02-03,3443.1,76,3MINDIA,2014,112650000.0,387865200000.0,6.74656e-07,180.0,6,-0.058053,,,,,,,,
2,2,2014-03-03,3461.4,419,3MINDIA,2014,112650000.0,389926700000.0,3.719485e-06,175.0,6,0.531498,,,,,,,,
3,3,2014-04-01,3404.7,253,3MINDIA,2014,112650000.0,383539500000.0,2.245894e-06,167.0,5,-1.638066,-1.17268,,,,85.0,,,
4,4,2014-05-01,3550.6,239,3MINDIA,2014,112650000.0,399975100000.0,2.121616e-06,165.0,5,4.285253,3.122186,,,,45.0,,,


#### Assign Ranks to R1-R5 Quintile

In [7]:
# Define quintile function with check for sufficient unique values
def assign_quintile(x, rank_col, quintile_col):
    if len(x[rank_col].unique()) < 5:
        x[quintile_col] = np.nan
    else:
        x[quintile_col] = pd.qcut(x[rank_col], 5, labels=False) + 1
    return x

# Apply quintile ranking
for period in ['3M', '6M', '9M', '12M']:
    rank_col = f'{period}_Rank'
    quintile_col = f'{period}_Quintile'
    monthly_data = monthly_data.groupby('Date').apply(assign_quintile, rank_col, quintile_col).reset_index(drop=True)

monthly_data[monthly_data['Symbol'] == 'ABB'].head()#### Calculating J Month Past Returns 

Unnamed: 0,index,Date,Adj Close,Volume,Symbol,Year,Outstanding_Shares,Market_cap,Turnover,Size_Rank,...,9M_Return,12M_Return,3M_Rank,6M_Rank,9M_Rank,12M_Rank,3M_Quintile,6M_Quintile,9M_Quintile,12M_Quintile
2,240,2014-01-01,591.9,75730,ABB,2014,2119080000.0,1254283000000.0,3.6e-05,259.0,...,,,,,,,,,,
339,241,2014-02-03,496.2,138871,ABB,2014,2119080000.0,1051487000000.0,6.6e-05,255.0,...,,,,,,,,,,
676,242,2014-03-03,620.9,392148,ABB,2014,2119080000.0,1315737000000.0,0.000185,266.0,...,,,,,,,,,,
1013,243,2014-04-01,732.8,180312,ABB,2014,2119080000.0,1552862000000.0,8.5e-05,268.0,...,,,277.0,,,,5.0,,,
1350,244,2014-05-01,724.1,121174,ABB,2014,2119080000.0,1534426000000.0,5.7e-05,266.0,...,,,295.0,,,,5.0,,,


#### Calculate Average Turnover over the J formation periods

In [8]:
# Calculate average turnover over the formation periods
for period in ['3M', '6M', '9M', '12M']:
    avg_turnover_col = f'{period}_Avg_Turnover'
    window_size = int(period[:-1])
    monthly_data[avg_turnover_col] = monthly_data.groupby('Symbol')['Turnover'].rolling(window=window_size,min_periods=1).mean().reset_index(level=0, drop=True)

monthly_data.head()

Unnamed: 0,index,Date,Adj Close,Volume,Symbol,Year,Outstanding_Shares,Market_cap,Turnover,Size_Rank,...,9M_Rank,12M_Rank,3M_Quintile,6M_Quintile,9M_Quintile,12M_Quintile,3M_Avg_Turnover,6M_Avg_Turnover,9M_Avg_Turnover,12M_Avg_Turnover
0,0,2014-01-01,3445.1,149,3MINDIA,2014,112650000.0,388090500000.0,1e-06,174.0,...,,,,,,,1e-06,1e-06,1e-06,1e-06
1,120,2014-01-01,22.2,94557,AARTIIND,2014,3332820000.0,73988600000.0,2.8e-05,48.0,...,,,,,,,2.8e-05,2.8e-05,2.8e-05,2.8e-05
2,240,2014-01-01,591.9,75730,ABB,2014,2119080000.0,1254283000000.0,3.6e-05,259.0,...,,,,,,,3.6e-05,3.6e-05,3.6e-05,3.6e-05
3,360,2014-01-01,1549.1,1444,ABBOTINDIA,2014,212490000.0,329168300000.0,7e-06,156.0,...,,,,,,,7e-06,7e-06,7e-06,7e-06
4,480,2014-01-01,87.5,9623,ABFRL,2014,941390000.0,82371620000.0,1e-05,54.0,...,,,,,,,1e-05,1e-05,1e-05,1e-05


#### Ranking Stocks Based on Average Turnover

In [9]:
# Rank stocks based on average turnover
for period in ['3M', '6M', '9M', '12M']:
    avg_turnover_col = f'{period}_Avg_Turnover'
    rank_col = f'{period}_Turnover_Rank'
    monthly_data[rank_col] = monthly_data.groupby('Date')[avg_turnover_col].rank(method='first')
    
monthly_data.head()

Unnamed: 0,index,Date,Adj Close,Volume,Symbol,Year,Outstanding_Shares,Market_cap,Turnover,Size_Rank,...,9M_Quintile,12M_Quintile,3M_Avg_Turnover,6M_Avg_Turnover,9M_Avg_Turnover,12M_Avg_Turnover,3M_Turnover_Rank,6M_Turnover_Rank,9M_Turnover_Rank,12M_Turnover_Rank
0,0,2014-01-01,3445.1,149,3MINDIA,2014,112650000.0,388090500000.0,1e-06,174.0,...,,,1e-06,1e-06,1e-06,1e-06,11.0,11.0,11.0,11.0
1,120,2014-01-01,22.2,94557,AARTIIND,2014,3332820000.0,73988600000.0,2.8e-05,48.0,...,,,2.8e-05,2.8e-05,2.8e-05,2.8e-05,125.0,125.0,125.0,125.0
2,240,2014-01-01,591.9,75730,ABB,2014,2119080000.0,1254283000000.0,3.6e-05,259.0,...,,,3.6e-05,3.6e-05,3.6e-05,3.6e-05,145.0,145.0,145.0,145.0
3,360,2014-01-01,1549.1,1444,ABBOTINDIA,2014,212490000.0,329168300000.0,7e-06,156.0,...,,,7e-06,7e-06,7e-06,7e-06,41.0,41.0,41.0,41.0
4,480,2014-01-01,87.5,9623,ABFRL,2014,941390000.0,82371620000.0,1e-05,54.0,...,,,1e-05,1e-05,1e-05,1e-05,58.0,58.0,58.0,58.0


#### Assign Average Turnover Ranks to Terciles V1-V3

In [10]:
# Define tercile function with check for sufficient unique values
def assign_tercile(x, rank_col, tercile_col):
    if len(x[rank_col].unique()) < 3:
        x[tercile_col] = np.nan
    else:
        x[tercile_col] = pd.qcut(x[rank_col], 3, labels=False) + 1
    return x

# Apply tercile ranking
for period in ['3M', '6M', '9M', '12M']:
    rank_col = f'{period}_Turnover_Rank'
    tercile_col = f'{period}_Turnover_Tercile'
    monthly_data = monthly_data.groupby('Date').apply(assign_tercile, rank_col, tercile_col).reset_index(drop=True)
    
monthly_data.head()

Unnamed: 0,index,Date,Adj Close,Volume,Symbol,Year,Outstanding_Shares,Market_cap,Turnover,Size_Rank,...,9M_Avg_Turnover,12M_Avg_Turnover,3M_Turnover_Rank,6M_Turnover_Rank,9M_Turnover_Rank,12M_Turnover_Rank,3M_Turnover_Tercile,6M_Turnover_Tercile,9M_Turnover_Tercile,12M_Turnover_Tercile
0,0,2014-01-01,3445.1,149,3MINDIA,2014,112650000.0,388090500000.0,1e-06,174.0,...,1e-06,1e-06,11.0,11.0,11.0,11.0,1,1,1,1
1,120,2014-01-01,22.2,94557,AARTIIND,2014,3332820000.0,73988600000.0,2.8e-05,48.0,...,2.8e-05,2.8e-05,125.0,125.0,125.0,125.0,2,2,2,2
2,240,2014-01-01,591.9,75730,ABB,2014,2119080000.0,1254283000000.0,3.6e-05,259.0,...,3.6e-05,3.6e-05,145.0,145.0,145.0,145.0,2,2,2,2
3,360,2014-01-01,1549.1,1444,ABBOTINDIA,2014,212490000.0,329168300000.0,7e-06,156.0,...,7e-06,7e-06,41.0,41.0,41.0,41.0,1,1,1,1
4,480,2014-01-01,87.5,9623,ABFRL,2014,941390000.0,82371620000.0,1e-05,54.0,...,1e-05,1e-05,58.0,58.0,58.0,58.0,1,1,1,1


#### Calculate Current Month (1 Month Future Return) 

In [11]:
monthly_data['Current_Month_Return'] = monthly_data.groupby('Symbol')['Monthly_Return'].shift(-1)

#### PRICE MOMENTUM RESULTS

In [16]:
# Function to calculate the equal-weighted average return for a portfolio
def calculate_weighted_avg_returns(df, J, K, rank):
    df['YearMonth'] = df['Date'].dt.to_period('M')
    unique_year_months = df['YearMonth'].unique()
    portfolio_returns = []

    for current_year_month in unique_year_months:
        returns = 0
        valid_count = 0

        for offset in range(K):
            # Ensure we use exact YearMonth dates from the dataset
            index = np.where(unique_year_months == current_year_month)[0][0] - offset
            if index < 0:
                continue
            look_back_year_month = unique_year_months[index]
            portfolio_stocks = df[(df['YearMonth'] == look_back_year_month) & (df[f'{J}M_Quintile'] == rank)]['Symbol']

            if not portfolio_stocks.empty:
                current_returns = df[(df['YearMonth'] == current_year_month) & (df['Symbol'].isin(portfolio_stocks))]['Current_Month_Return'].mean()
                if not np.isnan(current_returns):
                    # Accumulate the product of (1 + returns)
                    returns *= (1 + (current_returns/100))
                    valid_count += 1         

        if valid_count > 0:
            # Compute the geometric mean of the returns
            geometric_return = (returns ** (1 / valid_count)) - 1
            portfolio_returns.append({'YearMonth': current_year_month, 'J': J, 'K': K, 'Rank': 'R'+str(rank), 'Portfolio_Return': geometric_return*100})

    return pd.DataFrame(portfolio_returns)

# Calculate the weighted average returns for all combinations of J, K, and ranks
results = []
for J in [3, 6, 9, 12]:
    for K in [3, 6, 9, 12]:
        for rank in [1, 5]:
            result = calculate_weighted_avg_returns(monthly_data, J, K, rank)
            results.append(result)

# Combine all results into a single DataFrame
portfolio_returns_df = pd.concat(results, ignore_index=True)

portfolio_returns_df

KeyboardInterrupt: 

##### Consolidate Results For Each J and K combination ( 4 J * 4 K * 3 (R1, R5, R1-R5) = 48 combinations)

In [None]:
# Function to calculate mean and t-test for each group
def calculate_mean_and_ttest(group):
    mean_return = group['Portfolio_Return'].mean()
    t_stat, _ = ttest_1samp(group['Portfolio_Return'], 0)
    return pd.Series({'Mean_Return': mean_return, 'T_Value': t_stat})

# Function to calculate Rank 1 - Rank 5 returns and t-test
def calculate_diff_and_ttest(df):
    rank1 = df[df['Rank'] == 'R1']
    rank5 = df[df['Rank'] == 'R5']
    
    if not rank1.empty and not rank5.empty:
        diff_returns = rank5['Portfolio_Return'].values - rank1['Portfolio_Return'].values
        mean_diff = diff_returns.mean()
        t_stat, _ = ttest_1samp(diff_returns, 0)
        return pd.Series({'Mean_Return': mean_diff, 'T_Value': t_stat, 'Rank': 'R5-R1'})
    else:
        return pd.Series({'Mean_Return': None, 'T_Value': None, 'Rank': 'R5-R1'})

# Group by J, K, and Rank and apply the function
mean_portfolio_returns_df = portfolio_returns_df.groupby(['J', 'K', 'Rank']).apply(calculate_mean_and_ttest).reset_index()

# Calculate Rank 1 - Rank 10 difference and t-test
diff_returns_df = portfolio_returns_df.groupby(['J', 'K']).apply(calculate_diff_and_ttest).reset_index()

# Combine the results
combined_df = pd.concat([mean_portfolio_returns_df, diff_returns_df], ignore_index=True)

combined_df

In [None]:
combined_df = combined_df.round(2)
combined_df.to_csv('combined_df_5.csv', index=False)

#### PRICE MOMENTUM AND TURNOVER RESULTS

In [None]:
# Function to calculate the equal-weighted average return for a portfolio including volume rankings
def calculate_weighted_avg_returns(df, J, K, return_rank, volume_rank):
    df['YearMonth'] = df['Date'].dt.to_period('M')
    unique_year_months = df['YearMonth'].unique()
    portfolio_returns = []

    for current_year_month in unique_year_months:
        returns = 0
        valid_count = 0

        for offset in range(K):
            # Ensure we use exact YearMonth dates from the dataset
            index = np.where(unique_year_months == current_year_month)[0][0] - offset
            if index < 0:
                continue
            look_back_year_month = unique_year_months[index]
            portfolio_stocks = df[(df['YearMonth'] == look_back_year_month) & (df[f'{J}M_Quintile'] == return_rank) & (df[f'{J}M_Turnover_Tercile'] == volume_rank)]['Symbol']

            if not portfolio_stocks.empty:
                current_returns = df[(df['YearMonth'] == current_year_month) & (df['Symbol'].isin(portfolio_stocks))]['Current_Month_Return'].mean()
                if not np.isnan(current_returns):
                    returns *= (1 + current_returns)
                    valid_count += 1

        if valid_count > 0:
            # Compute the geometric mean of the returns
            geometric_return = (returns ** (1 / valid_count)) - 1
            portfolio_returns.append({'YearMonth': current_year_month, 'J': J, 'K': K, 'Return_Rank': 'R'+str(return_rank), 'Volume_Rank': 'V'+str(volume_rank), 'Portfolio_Return': geometric_return})

    return pd.DataFrame(portfolio_returns)

# Calculate the weighted average returns for all combinations of J, K, return ranks, and volume ranks
results = []
for J in [3, 6, 9, 12]:
    for K in [3, 6, 9, 12]:
        for return_rank in [1, 5]:
            for volume_rank in [1, 2, 3]:  # 1 for lowest volume tercile, 3 for highest volume tercile
                result = calculate_weighted_avg_returns(monthly_data, J, K, return_rank, volume_rank)
                results.append(result)

# Combine all results into a single DataFrame
portfolio_returns_df = pd.concat(results, ignore_index=True)

# Display the result
portfolio_returns_df

#### Add Records for V3-V1 for each J, K, R1, R5

In [None]:
# Add records for V1 - V3 for each J, K, R1, R5
diff_results = []

for (J, K, return_rank), group in portfolio_returns_df.groupby(['J', 'K', 'Return_Rank']):
    v1 = group[group['Volume_Rank'] == 'V1']
    v3 = group[group['Volume_Rank'] == 'V3']
    
    if not v1.empty and not v3.empty:
        for date in v1['YearMonth'].unique():
            v1_return = v1[v1['YearMonth'] == date]['Portfolio_Return'].values
            v3_return = v3[v3['YearMonth'] == date]['Portfolio_Return'].values
            if len(v1_return) > 0 and len(v3_return) > 0:
                diff_returns_v3_v1 = v3_return[0] - v1_return[0]
                diff_results.append({'YearMonth': date, 'J': J, 'K': K, 'Return_Rank': return_rank, 'Volume_Rank': 'V3-V1', 'Portfolio_Return': diff_returns_v3_v1})
                
# Convert the diff_results list to a DataFrame and concatenate with portfolio_returns_df
diff_results_df = pd.DataFrame(diff_results)
portfolio_returns_df = pd.concat([portfolio_returns_df, diff_results_df], ignore_index=True)
portfolio_returns_df

#### Add records for R5 - R1 for each J, K, V1, V2, V3, V3-V1

In [None]:
# Add records for R1 - R10 for each J, K, V1, V2, V3
for (J, K, volume_rank), group in portfolio_returns_df.groupby(['J', 'K', 'Volume_Rank']):
    rank1 = group[group['Return_Rank'] == 'R1']
    rank5 = group[group['Return_Rank'] == 'R5']
    
    if not rank1.empty and not rank5.empty:
        for date in rank1['YearMonth'].unique():
            rank1_return = rank1[rank1['YearMonth'] == date]['Portfolio_Return'].values
            rank5_return = rank5[rank5['YearMonth'] == date]['Portfolio_Return'].values
            if len(rank1_return) > 0 and len(rank5_return) > 0:
                diff_returns_r1_r5 = rank5_return[0] - rank1_return[0]
                diff_results.append({'YearMonth': date, 'J': J, 'K': K, 'Return_Rank': 'R5-R1', 'Volume_Rank': volume_rank, 'Portfolio_Return': diff_returns_r1_r5})

# Convert the diff_results list to a DataFrame and concatenate with portfolio_returns_df
diff_results_df = pd.DataFrame(diff_results)
portfolio_returns_df = pd.concat([portfolio_returns_df, diff_results_df], ignore_index=True)
portfolio_returns_df

##### Consolidate Results For Each J and K combination ( 4 J * 4 K * 3 (R1, R5, R1-R5) * 4* (V1, V2, V3, V3-V1) = 192 combinations)

In [None]:
# Apply the function to calculate mean return and t-statistic for all combinations
final_results_df = portfolio_returns_df.groupby(['J', 'K', 'Return_Rank', 'Volume_Rank']).apply(calculate_mean_and_ttest).reset_index()

final_results_df

In [None]:
# Assuming final_results_df is your DataFrame
final_results_df = final_results_df.round(2)
final_results_df.to_csv('final_results_df_reverse_5.csv')

### Portfolio Characteristics

In [None]:
def calculate_portfolio_characteristics(df, J, return_rank, volume_rank):
    df['YearMonth'] = df['Date'].dt.to_period('M')
    
    results = []
    for current_year_month in df['YearMonth'].unique():
        filtered_data = df[(df['YearMonth'] == current_year_month) & 
                           (df[f'{J}M_Quintile'] == return_rank) & 
                           (df[f'{J}M_Turnover_Tercile'] == volume_rank)]
        
        if not filtered_data.empty:
            results.append({
                'YearMonth': current_year_month, 'J': J, 'Return_Rank': 'R'+str(return_rank), 
                'Volume_Rank': 'V'+str(volume_rank), 
                'Portfolio_Return': (filtered_data[f'{J}M_Return'].mean() / J),
                'Turnover': filtered_data['Turnover'].mean(), 
                'Size_Decile': filtered_data['Size_Decile'].median(),
                'Number_of_stocks': filtered_data.shape[0]
            })
    return pd.DataFrame(results)
    

# Calculate the weighted average returns for all combinations of J, K, return ranks, and volume ranks
portfolio_characteristics = []
for J in [3, 6, 9, 12]:
        for return_rank in (1, 2, 3, 4, 5):
            for volume_rank in [1, 2, 3]:  # 1 for lowest volume tercile, 3 for highest volume tercile
                result = calculate_portfolio_characteristics(monthly_data, J, return_rank, volume_rank)
                portfolio_characteristics.append(result)

# Combine all results into a single DataFrame
portfolio_characteristics_df = pd.concat(portfolio_characteristics, ignore_index=True)

# Display the result
portfolio_characteristics_df

In [None]:
def geometric_mean(x):
    return np.exp(np.log(x + 1).mean()) - 1

# Group by 'J', 'Return_Rank', and 'Volume_Rank'
grouped_df = portfolio_characteristics_df.groupby(['J', 'Return_Rank', 'Volume_Rank'])

# Calculate required metrics
aggregated_df = grouped_df.agg({
    'Portfolio_Return': lambda x: round((geometric_mean((x / 100) + 1) - 1) * 100, 2),  # Geometric average of returns
    'Turnover': 'mean',                  # Arithmetic average of turnover
    'Size_Decile': 'mean',               # Arithmetic average of size decile
    'Number_of_stocks': 'mean'           # Arithmetic average of number of stocks
})

# Reset index if you want 'J', 'Return_Rank', 'Volume_Rank' as columns
aggregated_df.reset_index(inplace=True)

In [None]:
aggregated_df

In [None]:
# Calculating aggregate turnover into percentage
aggregated_df['Turnover'] = aggregated_df['Turnover'] * 100

aggregated_df = aggregated_df.round(2)
aggregated_df.to_csv('portfolio_Characteristics.csv')