In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load the data
file_path = 'final_data.csv'
stock_data = pd.read_csv(file_path)

# Check for and drop the 'Unnamed: 0' column if it exists
if 'Unnamed: 0' in stock_data.columns:
    stock_data.drop(columns=['Unnamed: 0'], inplace=True)
    
stock_data

Unnamed: 0,Adj Close,Volume,Date,Symbol,Year,Outstanding_Shares
0,429.1,1411814,01/01/2014,MCX,2014,50.998
1,424.3,2919045,02/01/2014,MCX,2014,50.998
2,499.6,8191055,03/01/2014,MCX,2014,50.998
3,497.8,8395828,06/01/2014,MCX,2014,50.998
4,517.0,6823517,07/01/2014,MCX,2014,50.998
...,...,...,...,...,...,...
830700,2565.1,8270892,22/12/2023,RELIANCE,2023,6765.684
830701,2578.1,3732832,26/12/2023,RELIANCE,2023,6765.684
830702,2586.9,4602078,27/12/2023,RELIANCE,2023,6765.684
830703,2605.6,6151318,28/12/2023,RELIANCE,2023,6765.684


In [3]:
# Convert 'Outstanding_Shares' to numeric (if not already)
stock_data['Outstanding_Shares'] = pd.to_numeric(stock_data['Outstanding_Shares'], errors='coerce')

# Multiply 'Outstanding_Shares' by 10,000,000 (since it's in crores) before calculating turnover
stock_data['Outstanding_Shares'] *= 10000000

# Calculate daily turnover
stock_data['Turnover'] = stock_data['Volume'] / stock_data['Outstanding_Shares']

# Display the first few rows with the new Turnover column
stock_data.head()

Unnamed: 0,Adj Close,Volume,Date,Symbol,Year,Outstanding_Shares,Turnover
0,429.1,1411814,01/01/2014,MCX,2014,509980000.0,0.002768
1,424.3,2919045,02/01/2014,MCX,2014,509980000.0,0.005724
2,499.6,8191055,03/01/2014,MCX,2014,509980000.0,0.016062
3,497.8,8395828,06/01/2014,MCX,2014,509980000.0,0.016463
4,517.0,6823517,07/01/2014,MCX,2014,509980000.0,0.01338


In [4]:
# Convert 'Date' to datetime format
stock_data['Date'] = pd.to_datetime(stock_data['Date'], format="%d/%m/%Y")

# Set 'Date' as the index
stock_data.set_index('Date', inplace=True)

In [5]:
# Resample to monthly frequency to get the first price of each month
monthly_data = stock_data.groupby('Symbol').resample('BMS').first()

# Reset the index to make 'Symbol' and 'Date' columns again
monthly_data = monthly_data.reset_index(level=0, drop=True)

# Calculate monthly returns in percentage
monthly_data['Monthly_Return'] = monthly_data.groupby('Symbol')['Adj Close'].pct_change() * 100

# Calculate cumulative returns for 3, 6, 9, and 12 months in percentage
monthly_data['3M_Return'] = monthly_data.groupby('Symbol')['Adj Close'].pct_change(3) * 100
monthly_data['6M_Return'] = monthly_data.groupby('Symbol')['Adj Close'].pct_change(6) * 100
monthly_data['9M_Return'] = monthly_data.groupby('Symbol')['Adj Close'].pct_change(9) * 100
monthly_data['12M_Return'] = monthly_data.groupby('Symbol')['Adj Close'].pct_change(12) * 100


monthly_data.reset_index(inplace=True)

In [6]:
# Rank stocks based on cumulative returns
for period in ['3M', '6M', '9M', '12M']:
    rank_col = f'{period}_Rank'
    monthly_data[rank_col] = monthly_data.groupby('Date')[f'{period}_Return'].rank(method='first', ascending=False)

In [7]:
# Define quintile function with check for sufficient unique values
def assign_quintile(x, rank_col, quintile_col):
    if len(x[rank_col].unique()) < 10:
        x[quintile_col] = np.nan
    else:
        x[quintile_col] = pd.qcut(x[rank_col], 10, labels=False) + 1
    return x

# Apply quintile ranking
for period in ['3M', '6M', '9M', '12M']:
    rank_col = f'{period}_Rank'
    quintile_col = f'{period}_Quintile'
    monthly_data = monthly_data.groupby('Date').apply(assign_quintile, rank_col, quintile_col).reset_index(drop=True)


In [8]:
monthly_data[monthly_data['Symbol'] == '3MINDIA']

Unnamed: 0,Date,Adj Close,Volume,Symbol,Year,Outstanding_Shares,Turnover,Monthly_Return,3M_Return,6M_Return,9M_Return,12M_Return,3M_Rank,6M_Rank,9M_Rank,12M_Rank,3M_Quintile,6M_Quintile,9M_Quintile,12M_Quintile
0,2014-01-01,3445.1,149,3MINDIA,2014,112650000.0,1.322681e-06,,,,,,,,,,,,,
337,2014-02-03,3443.1,76,3MINDIA,2014,112650000.0,6.746560e-07,-0.058053,,,,,,,,,,,,
674,2014-03-03,3461.4,419,3MINDIA,2014,112650000.0,3.719485e-06,0.531498,,,,,,,,,,,,
1011,2014-04-01,3404.7,253,3MINDIA,2014,112650000.0,2.245894e-06,-1.638066,-1.172680,,,,253.0,,,,8.0,,,
1348,2014-05-01,3550.6,239,3MINDIA,2014,112650000.0,2.121616e-06,4.285253,3.122186,,,,293.0,,,,9.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38755,2023-08-01,28210.7,14027,3MINDIA,2023,112650000.0,1.245184e-04,5.384133,28.146577,26.701399,28.603991,29.706157,88.0,140.0,130.0,149.0,3.0,5.0,4.0,5.0
39092,2023-09-01,30737.6,2486,3MINDIA,2023,112650000.0,2.206835e-05,8.957240,18.909384,37.986515,33.673123,41.414625,151.0,111.0,108.0,117.0,5.0,4.0,4.0,4.0
39429,2023-10-02,30959.9,1746,3MINDIA,2023,112650000.0,1.549933e-05,0.723218,15.654068,38.903395,46.417812,34.300537,145.0,135.0,89.0,160.0,5.0,4.0,3.0,5.0
39766,2023-11-01,29918.2,2663,3MINDIA,2023,112650000.0,2.363959e-05,-3.364675,6.052668,35.902864,34.370214,36.387963,133.0,99.0,138.0,124.0,4.0,3.0,5.0,4.0


In [9]:
# Calculate average turnover over the formation periods
for period in ['3M', '6M', '9M', '12M']:
    avg_turnover_col = f'{period}_Avg_Turnover'
    window_size = int(period[:-1])
    monthly_data[avg_turnover_col] = monthly_data.groupby('Symbol')['Turnover'].rolling(window=window_size,min_periods=1).mean().reset_index(level=0, drop=True)

In [10]:
monthly_data[monthly_data['Symbol'] == '3MINDIA']

Unnamed: 0,Date,Adj Close,Volume,Symbol,Year,Outstanding_Shares,Turnover,Monthly_Return,3M_Return,6M_Return,...,9M_Rank,12M_Rank,3M_Quintile,6M_Quintile,9M_Quintile,12M_Quintile,3M_Avg_Turnover,6M_Avg_Turnover,9M_Avg_Turnover,12M_Avg_Turnover
0,2014-01-01,3445.1,149,3MINDIA,2014,112650000.0,1.322681e-06,,,,...,,,,,,,1.322681e-06,1.322681e-06,1.322681e-06,1.322681e-06
337,2014-02-03,3443.1,76,3MINDIA,2014,112650000.0,6.746560e-07,-0.058053,,,...,,,,,,,9.986684e-07,9.986684e-07,9.986684e-07,9.986684e-07
674,2014-03-03,3461.4,419,3MINDIA,2014,112650000.0,3.719485e-06,0.531498,,,...,,,,,,,1.905607e-06,1.905607e-06,1.905607e-06,1.905607e-06
1011,2014-04-01,3404.7,253,3MINDIA,2014,112650000.0,2.245894e-06,-1.638066,-1.172680,,...,,,8.0,,,,2.213345e-06,1.990679e-06,1.990679e-06,1.990679e-06
1348,2014-05-01,3550.6,239,3MINDIA,2014,112650000.0,2.121616e-06,4.285253,3.122186,,...,,,9.0,,,,2.695665e-06,2.016866e-06,2.016866e-06,2.016866e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38755,2023-08-01,28210.7,14027,3MINDIA,2023,112650000.0,1.245184e-04,5.384133,28.146577,26.701399,...,130.0,149.0,3.0,5.0,4.0,5.0,7.742861e-05,4.970410e-05,4.212063e-05,3.827785e-05
39092,2023-09-01,30737.6,2486,3MINDIA,2023,112650000.0,2.206835e-05,8.957240,18.909384,37.986515,...,108.0,117.0,5.0,4.0,4.0,4.0,5.666519e-05,4.896138e-05,4.242048e-05,3.851013e-05
39429,2023-10-02,30959.9,1746,3MINDIA,2023,112650000.0,1.549933e-05,0.723218,15.654068,38.903395,...,89.0,160.0,5.0,4.0,3.0,5.0,5.402870e-05,4.835627e-05,3.964393e-05,3.623317e-05
39766,2023-11-01,29918.2,2663,3MINDIA,2023,112650000.0,2.363959e-05,-3.364675,6.052668,35.902864,...,138.0,124.0,4.0,3.0,5.0,4.0,2.040243e-05,4.891552e-05,3.993687e-05,3.669108e-05


In [11]:
# Rank stocks based on average turnover
for period in ['3M', '6M', '9M', '12M']:
    avg_turnover_col = f'{period}_Avg_Turnover'
    rank_col = f'{period}_Turnover_Rank'
    monthly_data[rank_col] = monthly_data.groupby('Date')[avg_turnover_col].rank(method='first')

In [12]:
# Define tercile function with check for sufficient unique values
def assign_tercile(x, rank_col, tercile_col):
    if len(x[rank_col].unique()) < 3:
        x[tercile_col] = np.nan
    else:
        x[tercile_col] = pd.qcut(x[rank_col], 3, labels=False) + 1
    return x

# Apply tercile ranking
for period in ['3M', '6M', '9M', '12M']:
    rank_col = f'{period}_Turnover_Rank'
    tercile_col = f'{period}_Turnover_Tercile'
    monthly_data = monthly_data.groupby('Date').apply(assign_tercile, rank_col, tercile_col).reset_index(drop=True)

In [13]:
monthly_data[monthly_data['Date'] == '2014-10-01']

Unnamed: 0,Date,Adj Close,Volume,Symbol,Year,Outstanding_Shares,Turnover,Monthly_Return,3M_Return,6M_Return,...,9M_Avg_Turnover,12M_Avg_Turnover,3M_Turnover_Rank,6M_Turnover_Rank,9M_Turnover_Rank,12M_Turnover_Rank,3M_Turnover_Tercile,6M_Turnover_Tercile,9M_Turnover_Tercile,12M_Turnover_Tercile
3033,2014-10-01,6023.7,453,3MINDIA,2014,1.126500e+08,0.000004,5.361016,40.724214,76.923077,...,0.000003,0.000003,4.0,3.0,3.0,3.0,1,1,1,1
3034,2014-10-01,64.2,122064,AARTIIND,2014,3.332820e+09,0.000037,1.102362,33.750000,128.469751,...,0.000101,0.000094,68.0,138.0,151.0,149.0,1,2,2,2
3035,2014-10-01,963.5,63905,ABB,2014,2.119080e+09,0.000030,2.587308,-3.224186,31.481987,...,0.000088,0.000083,136.0,108.0,136.0,130.0,2,1,2,2
3036,2014-10-01,3059.8,3557,ABBOTINDIA,2014,2.124900e+08,0.000017,26.182523,49.564962,84.581046,...,0.000015,0.000014,11.0,7.0,12.0,12.0,1,1,1,1
3037,2014-10-01,123.2,12477,ABFRL,2014,9.413900e+08,0.000013,9.413854,0.571429,52.286774,...,0.000022,0.000021,43.0,33.0,23.0,23.0,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3365,2014-10-01,104.7,5309380,YESBANK,2014,2.096607e+10,0.000253,-6.517857,0.287356,37.944664,...,0.000992,0.000925,312.0,322.0,330.0,327.0,3,3,3,3
3366,2014-10-01,295.7,3041803,ZEEL,2014,9.604490e+09,0.000317,10.171386,5.381326,15.462710,...,0.000241,0.000221,227.0,237.0,249.0,245.0,3,3,3,3
3367,2014-10-01,106.8,628350,ZENSARTECH,2014,2.227050e+09,0.000282,36.050955,44.911805,65.838509,...,0.000081,0.000094,191.0,145.0,124.0,148.0,2,2,2,2
3368,2014-10-01,3847.7,3769,ZFCVINDIA,2014,1.896800e+08,0.000020,7.633994,17.200731,93.254646,...,0.000018,0.000016,18.0,14.0,17.0,15.0,1,1,1,1


In [14]:
monthly_data['Current_Month_Return'] = monthly_data.groupby('Symbol')['Monthly_Return'].shift(-1)

In [15]:
### New Code

# Function to calculate the equal-weighted average return for a portfolio
def calculate_weighted_avg_returns(df, J, K, rank):
    df['YearMonth'] = df['Date'].dt.to_period('M')
    unique_year_months = df['YearMonth'].unique()
    portfolio_returns = []

    for current_year_month in unique_year_months:
        returns = 0
        valid_count = 0

        for offset in range(K):
            # Ensure we use exact YearMonth dates from the dataset
            index = np.where(unique_year_months == current_year_month)[0][0] - offset
            if index < 0:
                continue
            look_back_year_month = unique_year_months[index]
            portfolio_stocks = df[(df['YearMonth'] == look_back_year_month) & (df[f'{J}M_Quintile'] == rank)]['Symbol']

            if not portfolio_stocks.empty:
                current_returns = df[(df['YearMonth'] == current_year_month) & (df['Symbol'].isin(portfolio_stocks))]['Current_Month_Return'].mean()
                if not np.isnan(current_returns):
                    returns += current_returns
                    valid_count += 1         

        if valid_count > 0:
            average_return = returns / valid_count
            portfolio_returns.append({'YearMonth': current_year_month, 'J': J, 'K': K, 'Rank': rank, 'Portfolio_Return': average_return})

    return pd.DataFrame(portfolio_returns)

# Calculate the weighted average returns for all combinations of J, K, and ranks
results = []
for J in [3, 6, 9, 12]:
    for K in [3, 6, 9, 12]:
        for rank in [1, 10]:
            result = calculate_weighted_avg_returns(monthly_data, J, K, rank)
            results.append(result)

# Combine all results into a single DataFrame
portfolio_returns_df = pd.concat(results, ignore_index=True)

In [16]:
# Group by J, K, and Rank and calculate the mean portfolio returns
mean_portfolio_returns_df = portfolio_returns_df.groupby(['J', 'K', 'Rank'])['Portfolio_Return'].mean().reset_index()

In [17]:
from scipy.stats import ttest_1samp

# Function to calculate mean and t-test for each group
def calculate_mean_and_ttest(group):
    mean_return = group['Portfolio_Return'].mean()
    t_stat, _ = ttest_1samp(group['Portfolio_Return'], 0)
    return pd.Series({'Mean_Return': mean_return, 'T_Value': t_stat})

# Function to calculate Rank 1 - Rank 10 returns and t-test
def calculate_diff_and_ttest(df):
    rank1 = df[df['Rank'] == 1]
    rank10 = df[df['Rank'] == 10]
    
    if not rank1.empty and not rank10.empty:
        diff_returns = rank1['Portfolio_Return'].values - rank10['Portfolio_Return'].values
        mean_diff = diff_returns.mean()
        t_stat, _ = ttest_1samp(diff_returns, 0)
        return pd.Series({'Mean_Return': mean_diff, 'T_Value': t_stat, 'Rank': 'R1 - R10'})
    else:
        return pd.Series({'Mean_Return': None, 'T_Value': None, 'Rank': 'R1 - R10'})

# Group by J, K, and Rank and apply the function
mean_portfolio_returns_df = portfolio_returns_df.groupby(['J', 'K', 'Rank']).apply(calculate_mean_and_ttest).reset_index()

# Calculate Rank 1 - Rank 10 difference and t-test
diff_returns_df = portfolio_returns_df.groupby(['J', 'K']).apply(calculate_diff_and_ttest).reset_index()

# Combine the results
combined_df = pd.concat([mean_portfolio_returns_df, diff_returns_df], ignore_index=True)


In [None]:
#combined_df.to_csv('MomentumAndTurnover.csv', index=False)

### INCLUDING VOLUME

In [35]:
# Function to calculate the equal-weighted average return for a portfolio including volume rankings
def calculate_weighted_avg_returns(df, J, K, return_rank, volume_rank):
    df['YearMonth'] = df['Date'].dt.to_period('M')
    unique_year_months = df['YearMonth'].unique()
    portfolio_returns = []

    for current_year_month in unique_year_months:
        returns = 0
        valid_count = 0

        for offset in range(K):
            # Ensure we use exact YearMonth dates from the dataset
            index = np.where(unique_year_months == current_year_month)[0][0] - offset
            if index < 0:
                continue
            look_back_year_month = unique_year_months[index]
            portfolio_stocks = df[(df['YearMonth'] == look_back_year_month) & (df[f'{J}M_Quintile'] == return_rank) & (df[f'{J}M_Turnover_Tercile'] == volume_rank)]['Symbol']

            if not portfolio_stocks.empty:
                current_returns = df[(df['YearMonth'] == current_year_month) & (df['Symbol'].isin(portfolio_stocks))]['Current_Month_Return'].mean()
                if not np.isnan(current_returns):
                    returns += current_returns
                    valid_count += 1

        if valid_count > 0:
            average_return = returns / valid_count
            portfolio_returns.append({'YearMonth': current_year_month, 'J': J, 'K': K, 'Return_Rank': 'R'+str(return_rank), 'Volume_Rank': 'V'+str(volume_rank), 'Portfolio_Return': average_return})

    return pd.DataFrame(portfolio_returns)

# Calculate the weighted average returns for all combinations of J, K, return ranks, and volume ranks
results = []
for J in [3, 6, 9, 12]:
    for K in [3, 6, 9, 12]:
        for return_rank in [1, 10]:
            for volume_rank in [1, 2, 3]:  # 1 for lowest volume tercile, 3 for highest volume tercile
                result = calculate_weighted_avg_returns(monthly_data, J, K, return_rank, volume_rank)
                results.append(result)

# Combine all results into a single DataFrame
portfolio_returns_df = pd.concat(results, ignore_index=True)

# Display the result
print(portfolio_returns_df)

# Group by J, K, Return_Rank, and Volume_Rank and calculate the mean portfolio returns
mean_portfolio_returns_df = portfolio_returns_df.groupby(['J', 'K', 'Return_Rank', 'Volume_Rank'])['Portfolio_Return'].mean().reset_index()

      YearMonth   J   K Return_Rank Volume_Rank  Portfolio_Return
0       2014-04   3   3          R1          V1          5.691872
1       2014-05   3   3          R1          V1         29.131867
2       2014-06   3   3          R1          V1          9.643532
3       2014-07   3   3          R1          V1         -6.280164
4       2014-08   3   3          R1          V1          1.056106
...         ...  ..  ..         ...         ...               ...
10699   2023-07  12  12         R10          V3          9.861298
10700   2023-08  12  12         R10          V3          8.131906
10701   2023-09  12  12         R10          V3          1.596635
10702   2023-10  12  12         R10          V3         -4.027368
10703   2023-11  12  12         R10          V3          7.566734

[10704 rows x 6 columns]


In [37]:
mean_portfolio_returns_df

Unnamed: 0,J,K,Return_Rank,Volume_Rank,Portfolio_Return
0,3,3,R1,V1,4.143818
1,3,3,R1,V2,3.450880
2,3,3,R1,V3,3.635378
3,3,3,R10,V1,2.643245
4,3,3,R10,V2,2.367230
...,...,...,...,...,...
91,12,12,R1,V2,3.146406
92,12,12,R1,V3,2.439371
93,12,12,R10,V1,2.465994
94,12,12,R10,V2,2.255934


In [36]:
portfolio_returns_df.groupby(['J', 'K', 'Return_Rank', 'Volume_Rank']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,YearMonth,Portfolio_Return
J,K,Return_Rank,Volume_Rank,Unnamed: 4_level_1,Unnamed: 5_level_1
3,3,R1,V1,116,116
3,3,R1,V2,116,116
3,3,R1,V3,116,116
3,3,R10,V1,116,116
3,3,R10,V2,116,116
...,...,...,...,...,...
12,12,R1,V2,107,107
12,12,R1,V3,107,107
12,12,R10,V1,107,107
12,12,R10,V2,107,107


In [39]:
from scipy.stats import ttest_1samp
import pandas as pd
import numpy as np

# Assuming portfolio_returns_df is already available from the previous steps

# Function to calculate mean and t-test for each group
def calculate_mean_and_ttest(group):
    mean_return = group['Portfolio_Return'].mean()
    t_stat, _ = ttest_1samp(group['Portfolio_Return'], 0)
    return pd.Series({'Mean_Return': mean_return, 'T_Value': t_stat})

# Function to calculate Rank 1 - Rank 10 returns and t-test
def calculate_diff_and_ttest(group):
    rank1 = group[group['Return_Rank'] == 'R1']
    rank10 = group[group['Return_Rank'] == 'R10']
    
    if not rank1.empty and not rank10.empty:
        diff_returns = rank1['Portfolio_Return'].values - rank10['Portfolio_Return'].values
        mean_diff = diff_returns.mean()
        t_stat, _ = ttest_1samp(diff_returns, 0)
        return pd.Series({'Mean_Return': mean_diff, 'T_Value': t_stat, 'Return_Rank': 'R1 - R10'})
    else:
        return pd.Series({'Mean_Return': None, 'T_Value': None, 'Return_Rank': 'R1 - R10'})

# Group by J, K, Return_Rank, and Volume_Rank and apply the function
mean_portfolio_returns_df = portfolio_returns_df.groupby(['J', 'K', 'Return_Rank', 'Volume_Rank']).apply(calculate_mean_and_ttest).reset_index()

# Calculate Rank 1 - Rank 10 difference and t-test for each J, K, and Volume_Rank
diff_returns_df = portfolio_returns_df.groupby(['J', 'K', 'Volume_Rank']).apply(calculate_diff_and_ttest).reset_index()

# Combine the results
combined_df = pd.concat([mean_portfolio_returns_df, diff_returns_df], ignore_index=True)

# Display the combined results
print(combined_df)


      J   K Return_Rank Volume_Rank  Mean_Return   T_Value
0     3   3          R1          V1     4.143818  5.150902
1     3   3          R1          V2     3.450880  4.758338
2     3   3          R1          V3     3.635378  4.720153
3     3   3         R10          V1     2.643245  3.891590
4     3   3         R10          V2     2.367230  3.288345
..   ..  ..         ...         ...          ...       ...
139  12   9    R1 - R10          V2     1.030025  1.609544
140  12   9    R1 - R10          V3     0.609475  0.978097
141  12  12    R1 - R10          V1     0.429066  0.590654
142  12  12    R1 - R10          V2     0.890472  1.446029
143  12  12    R1 - R10          V3     0.312645  0.531540

[144 rows x 6 columns]


In [41]:
combined_df

Unnamed: 0,J,K,Return_Rank,Volume_Rank,Mean_Return,T_Value
0,3,3,R1,V1,4.143818,5.150902
1,3,3,R1,V2,3.450880,4.758338
2,3,3,R1,V3,3.635378,4.720153
3,3,3,R10,V1,2.643245,3.891590
4,3,3,R10,V2,2.367230,3.288345
...,...,...,...,...,...,...
139,12,9,R1 - R10,V2,1.030025,1.609544
140,12,9,R1 - R10,V3,0.609475,0.978097
141,12,12,R1 - R10,V1,0.429066,0.590654
142,12,12,R1 - R10,V2,0.890472,1.446029


In [42]:
from scipy.stats import ttest_1samp
import pandas as pd
import numpy as np

# Assuming portfolio_returns_df is already available from the previous steps

# Function to calculate mean and t-test for each group
def calculate_mean_and_ttest(group):
    mean_return = group['Portfolio_Return'].mean()
    t_stat, _ = ttest_1samp(group['Portfolio_Return'], 0)
    return pd.Series({'Mean_Return': mean_return, 'T_Value': t_stat})

# Function to calculate Rank 1 - Rank 10 returns and t-test
def calculate_diff_and_ttest(group):
    rank1 = group[group['Return_Rank'] == 'R1']
    rank10 = group[group['Return_Rank'] == 'R10']
    
    if not rank1.empty and not rank10.empty:
        diff_returns = rank1['Portfolio_Return'].values - rank10['Portfolio_Return'].values
        mean_diff = diff_returns.mean()
        t_stat, _ = ttest_1samp(diff_returns, 0)
        return pd.Series({'Mean_Return': mean_diff, 'T_Value': t_stat, 'Return_Rank': 'R1 - R10'})
    else:
        return pd.Series({'Mean_Return': None, 'T_Value': None, 'Return_Rank': 'R1 - R10'})

# Function to calculate V1 - V3 returns and t-test for each R1, R10, and R1 - R10
def calculate_volume_diff_and_ttest(group, return_rank):
    v1 = group[group['Volume_Rank'] == 'V1']
    v3 = group[group['Volume_Rank'] == 'V3']
    
    if not v1.empty and not v3.empty:
        diff_returns = v1['Portfolio_Return'].values - v3['Portfolio_Return'].values
        mean_diff = diff_returns.mean()
        t_stat, _ = ttest_1samp(diff_returns, 0)
        return pd.Series({'Mean_Return': mean_diff, 'T_Value': t_stat, 'Return_Rank': return_rank, 'Volume_Diff': 'V1 - V3'})
    else:
        return pd.Series({'Mean_Return': None, 'T_Value': None, 'Return_Rank': return_rank, 'Volume_Diff': 'V1 - V3'})

# Group by J, K, Return_Rank, and Volume_Rank and apply the function
mean_portfolio_returns_df = portfolio_returns_df.groupby(['J', 'K', 'Return_Rank', 'Volume_Rank']).apply(calculate_mean_and_ttest).reset_index()

# Calculate Rank 1 - Rank 10 difference and t-test for each J, K, and Volume_Rank
diff_returns_df = portfolio_returns_df.groupby(['J', 'K', 'Volume_Rank']).apply(calculate_diff_and_ttest).reset_index()

# Calculate V1 - V3 difference and t-test for each J, K, and Return_Rank
volume_diff_results = []
for (J, K), group in portfolio_returns_df.groupby(['J', 'K']):
    for return_rank in [1, 10]:
        result = calculate_volume_diff_and_ttest(group[group['Return_Rank'] == f'R{return_rank}'], f'R{return_rank}')
        volume_diff_results.append(result)
    
    # Calculate for R1 - R10
    rank1 = group[group['Return_Rank'] == 'R1']
    rank10 = group[group['Return_Rank'] == 'R10']
    
    if not rank1.empty and not rank10.empty:
        diff_returns_r1_r10 = rank1['Portfolio_Return'].values - rank10['Portfolio_Return'].values
        combined_group = pd.DataFrame({
            'Portfolio_Return': diff_returns_r1_r10,
            'Volume_Rank': rank1['Volume_Rank']
        })
        result = calculate_volume_diff_and_ttest(combined_group, 'R1 - R10')
        volume_diff_results.append(result)

# Combine the volume diff results into a single DataFrame
volume_diff_df = pd.DataFrame(volume_diff_results)

# Combine the mean returns and volume diff results
combined_df = pd.concat([mean_portfolio_returns_df, diff_returns_df, volume_diff_df], ignore_index=True)

# Display the combined results
print(combined_df)


       J    K Return_Rank Volume_Rank  Mean_Return   T_Value Volume_Diff
0    3.0  3.0          R1          V1     4.143818  5.150902         NaN
1    3.0  3.0          R1          V2     3.450880  4.758338         NaN
2    3.0  3.0          R1          V3     3.635378  4.720153         NaN
3    3.0  3.0         R10          V1     2.643245  3.891590         NaN
4    3.0  3.0         R10          V2     2.367230  3.288345         NaN
..   ...  ...         ...         ...          ...       ...         ...
187  NaN  NaN         R10         NaN     0.433239  0.963016     V1 - V3
188  NaN  NaN    R1 - R10         NaN     0.059968  0.079590     V1 - V3
189  NaN  NaN          R1         NaN     0.455689  0.760260     V1 - V3
190  NaN  NaN         R10         NaN     0.339267  0.764224     V1 - V3
191  NaN  NaN    R1 - R10         NaN     0.116421  0.164219     V1 - V3

[192 rows x 7 columns]
