In [None]:
import os
import sys
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import warnings
from scipy.stats import ttest_1samp


In [None]:
warnings.filterwarnings('ignore')
path = os.getenv("ROOT_PATH")
sys.path.append(path)
print(path)


## Statistical Significance

### **Proxy A :**

Abnormal stock returns following large stock price increases and decreases

In [None]:
proxy_a_df = pd.read_excel(f'{path}/raw_data/main/proxy_a_df.xlsx')


#### Price increases: 

In [None]:
# Calculate stock returns
df_price_2nd = pd.read_excel(f'{path}/raw_data/main/price.xlsx')
stock_returns_df = pd.DataFrame(df_price_2nd['Date'].copy())

for column in df_price_2nd.columns[1:]:
    stock_returns_df[f'{column}_returns'] = df_price_2nd[column].pct_change()

stock_returns_df = stock_returns_df.reset_index()
stock_returns_df.to_excel(f'{path}/raw_data/main/stock_returns.xlsx')

# Create a dictionary to store the result for each threshold
results_dict_a_eight_increase = {'Company': [],
                'Event_Date': [],
                'Avg_Next_Day_Returns': [],
                'P_Value_1_Day': [],
                'Avg_Next_2_Days_Returns': [],
                'P_Value_2_Days': [],
                'Avg_Next_5_Days_Returns': [],
                'P_Value_5_Days': [],
                'Avg_Next_20_Days_Returns': [],
                'P_Value_20_Days': []}

results_dict_a_ten_increase = {'Company': [],
                'Event_Date': [],
                'Avg_Next_Day_Returns': [],
                'P_Value_1_Day': [],
                'Avg_Next_2_Days_Returns': [],
                'P_Value_2_Days': [],
                'Avg_Next_5_Days_Returns': [],
                'P_Value_5_Days': [],
                'Avg_Next_20_Days_Returns': [],
                'P_Value_20_Days': []}

# Calculate average ARs and p-values for 8% threshold
for i in df_price_2nd.columns[1:-1]:

    rows_with_condition = proxy_a_df[proxy_a_df[f'{i}_Increase_8%'] == 1]

    for index, row in rows_with_condition.iterrows():

        # Date with large price change
        event_date = row['Date']

        # Index_next_day
        idx_next_day = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1
        idx_event_day = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0]

        # Index_next_2_days
        idx_next_2_days = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 2
        next_2_days_returns = stock_returns_df.loc[proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1: proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 2, f'{i}_returns']

        # Index_next_5_days
        idx_next_5_days = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 5
        next_5_days_returns = stock_returns_df.loc[proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1: proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 5, f'{i}_returns']

        # Index_next_20_days
        idx_next_20_days = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 20
        next_20_days_returns = stock_returns_df.loc[proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1: proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 20, f'{i}_returns']


        if idx_next_day < len(stock_returns_df) and idx_next_2_days < len(stock_returns_df) and \
           idx_next_5_days < len(stock_returns_df) and idx_next_20_days < len(stock_returns_df):

        # Average_next_day
            avg_next_day_returns = stock_returns_df.at[idx_next_day, f'{i}_returns'].mean()
            p_value_1_day = ttest_1samp(avg_next_day_returns,0).pvalue


            # Average_next_2_days
            avg_next_2_days_returns = next_2_days_returns.mean()
            p_value_2_days = ttest_1samp(next_2_days_returns, 0).pvalue

            # Average_next_5_days
            avg_next_5_days_returns = next_5_days_returns.mean()
            p_value_5_days = ttest_1samp(next_5_days_returns, 0).pvalue

            # Average_next_20_days
            avg_next_20_days_returns = next_20_days_returns.mean()
            p_value_20_days = ttest_1samp(next_20_days_returns, 0).pvalue



        p_value_1_day_two_tailed = p_value_1_day * 2
        p_value_2_days_two_tailed = p_value_2_days * 2
        p_value_5_days_two_tailed = p_value_5_days * 2
        p_value_20_days_two_tailed = p_value_20_days * 2

        results_dict_a_eight_increase['Company'].append(i)
        results_dict_a_eight_increase['Event_Date'].append(event_date)
        results_dict_a_eight_increase['Avg_Next_Day_Returns'].append(avg_next_day_returns)
        results_dict_a_eight_increase['P_Value_1_Day'].append(p_value_1_day)
        results_dict_a_eight_increase['Avg_Next_2_Days_Returns'].append(avg_next_2_days_returns)
        results_dict_a_eight_increase['P_Value_2_Days'].append(p_value_2_days)
        results_dict_a_eight_increase['Avg_Next_5_Days_Returns'].append(avg_next_5_days_returns)
        results_dict_a_eight_increase['P_Value_5_Days'].append(p_value_5_days)
        results_dict_a_eight_increase['Avg_Next_20_Days_Returns'].append(avg_next_20_days_returns)
        results_dict_a_eight_increase['P_Value_20_Days'].append(p_value_20_days)



        print(f'Company: {i}, Date: {event_date}, Avg Next Day Returns: {avg_next_day_returns} , P value: {p_value_1_day} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 2 Days Returns: {avg_next_2_days_returns},P value: {p_value_2_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 5 Days Returns: {avg_next_5_days_returns},P value: {p_value_5_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 20 Days Returns: {avg_next_20_days_returns},P value: {p_value_20_days} ')

results_significance_a_eight_increase = pd.DataFrame(results_dict_a_eight_increase)


# Calculate average ARs and p-values for 10% threshold
for i in df_price_2nd.columns[1:-1]:

    rows_with_condition = proxy_a_df[proxy_a_df[f'{i}_Increase_10%'] == 1]

    for index, row in rows_with_condition.iterrows():

        # Date with large price change
        event_date = row['Date']

        # Index_next_day
        idx_next_day = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1
        idx_event_day = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0]

        # Index_next_2_days
        idx_next_2_days = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 2
        next_2_days_returns = stock_returns_df.loc[proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1: proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 2, f'{i}_returns']

        # Index_next_5_days
        idx_next_5_days = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 5
        next_5_days_returns = stock_returns_df.loc[proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1: proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 5, f'{i}_returns']

        # Index_next_20_days
        idx_next_20_days = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 20
        next_20_days_returns = stock_returns_df.loc[proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1: proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 20, f'{i}_returns']


        if idx_next_day < len(stock_returns_df) and idx_next_2_days < len(stock_returns_df) and \
           idx_next_5_days < len(stock_returns_df) and idx_next_20_days < len(stock_returns_df):

        # Average_next_day
            avg_next_day_returns = stock_returns_df.at[idx_next_day, f'{i}_returns'].mean()
            p_value_1_day = ttest_1samp(avg_next_day_returns,0).pvalue


            # Average_next_2_days
            avg_next_2_days_returns = next_2_days_returns.mean()
            p_value_2_days = ttest_1samp(next_2_days_returns, 0).pvalue

            # Average_next_5_days
            avg_next_5_days_returns = next_5_days_returns.mean()
            p_value_5_days = ttest_1samp(next_5_days_returns, 0).pvalue

            # Average_next_20_days
            avg_next_20_days_returns = next_20_days_returns.mean()
            p_value_20_days = ttest_1samp(next_20_days_returns, 0).pvalue



        p_value_1_day_two_tailed = p_value_1_day * 2
        p_value_2_days_two_tailed = p_value_2_days * 2
        p_value_5_days_two_tailed = p_value_5_days * 2
        p_value_20_days_two_tailed = p_value_20_days * 2

        results_dict_a_ten_increase['Company'].append(i)
        results_dict_a_ten_increase['Event_Date'].append(event_date)
        results_dict_a_ten_increase['Avg_Next_Day_Returns'].append(avg_next_day_returns)
        results_dict_a_ten_increase['P_Value_1_Day'].append(p_value_1_day)
        results_dict_a_ten_increase['Avg_Next_2_Days_Returns'].append(avg_next_2_days_returns)
        results_dict_a_ten_increase['P_Value_2_Days'].append(p_value_2_days)
        results_dict_a_ten_increase['Avg_Next_5_Days_Returns'].append(avg_next_5_days_returns)
        results_dict_a_ten_increase['P_Value_5_Days'].append(p_value_5_days)
        results_dict_a_ten_increase['Avg_Next_20_Days_Returns'].append(avg_next_20_days_returns)
        results_dict_a_ten_increase['P_Value_20_Days'].append(p_value_20_days)



        print(f'Company: {i}, Date: {event_date}, Avg Next Day Returns: {avg_next_day_returns} , P value: {p_value_1_day} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 2 Days Returns: {avg_next_2_days_returns},P value: {p_value_2_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 5 Days Returns: {avg_next_5_days_returns},P value: {p_value_5_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 20 Days Returns: {avg_next_20_days_returns},P value: {p_value_20_days} ')

results_significance_a_ten_increase = pd.DataFrame(results_dict_a_ten_increase)


#### Price decreases: 

In [None]:
# Calculate stock returns
df_price_2nd = pd.read_excel(f'{path}/raw_data/main/price.xlsx')
stock_returns_df = pd.DataFrame(df_price_2nd['Date'].copy())

for column in df_price_2nd.columns[1:]:
    stock_returns_df[f'{column}_returns'] = df_price_2nd[column].pct_change()

stock_returns_df = stock_returns_df.reset_index()

# Create a dictionary to store the result for each threshold
results_dict_a_eight_decrease = {'Company': [],
                'Event_Date': [],
                'Avg_Next_Day_Returns': [],
                'P_Value_1_Day': [],
                'Avg_Next_2_Days_Returns': [],
                'P_Value_2_Days': [],
                'Avg_Next_5_Days_Returns': [],
                'P_Value_5_Days': [],
                'Avg_Next_20_Days_Returns': [],
                'P_Value_20_Days': []}

results_dict_a_ten_decrease = {'Company': [],
                'Event_Date': [],
                'Avg_Next_Day_Returns': [],
                'P_Value_1_Day': [],
                'Avg_Next_2_Days_Returns': [],
                'P_Value_2_Days': [],
                'Avg_Next_5_Days_Returns': [],
                'P_Value_5_Days': [],
                'Avg_Next_20_Days_Returns': [],
                'P_Value_20_Days': []}

# Calculate average ARs and p-values for 8% threshold
for i in df_price.columns[1:-1]:

    rows_with_condition = proxy_a_df[proxy_a_df[f'{i}_Decrease_8%'] == 1]

    for index, row in rows_with_condition.iterrows():

        # Date with large price change
        event_date = row['Date']

        # Index_next_day
        idx_next_day = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1
        idx_event_day = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0]

        # Index_next_2_days
        idx_next_2_days = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 2
        next_2_days_returns = stock_returns_df.loc[proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1: proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 2, f'{i}_returns']

        # Index_next_5_days
        idx_next_5_days = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 5
        next_5_days_returns = stock_returns_df.loc[proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1: proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 5, f'{i}_returns']

        # Index_next_20_days
        idx_next_20_days = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 20
        next_20_days_returns = stock_returns_df.loc[proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1: proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 20, f'{i}_returns']


        if idx_next_day < len(stock_returns_df) and idx_next_2_days < len(stock_returns_df) and \
           idx_next_5_days < len(stock_returns_df) and idx_next_20_days < len(stock_returns_df):

        # Average_next_day
            avg_next_day_returns = stock_returns_df.at[idx_next_day, f'{i}_returns'].mean()
            p_value_1_day = ttest_1samp(avg_next_day_returns,0).pvalue


            # Average_next_2_days
            avg_next_2_days_returns = next_2_days_returns.mean()
            p_value_2_days = ttest_1samp(next_2_days_returns, 0).pvalue

            # Average_next_5_days
            avg_next_5_days_returns = next_5_days_returns.mean()
            p_value_5_days = ttest_1samp(next_5_days_returns, 0).pvalue

            # Average_next_20_days
            avg_next_20_days_returns = next_20_days_returns.mean()
            p_value_20_days = ttest_1samp(next_20_days_returns, 0).pvalue



        p_value_1_day_two_tailed = p_value_1_day * 2
        p_value_2_days_two_tailed = p_value_2_days * 2
        p_value_5_days_two_tailed = p_value_5_days * 2
        p_value_20_days_two_tailed = p_value_20_days * 2

        results_dict_a_eight_decrease['Company'].append(i)
        results_dict_a_eight_decrease['Event_Date'].append(event_date)
        results_dict_a_eight_decrease['Avg_Next_Day_Returns'].append(avg_next_day_returns)
        results_dict_a_eight_decrease['P_Value_1_Day'].append(p_value_1_day)
        results_dict_a_eight_decrease['Avg_Next_2_Days_Returns'].append(avg_next_2_days_returns)
        results_dict_a_eight_decrease['P_Value_2_Days'].append(p_value_2_days)
        results_dict_a_eight_decrease['Avg_Next_5_Days_Returns'].append(avg_next_5_days_returns)
        results_dict_a_eight_decrease['P_Value_5_Days'].append(p_value_5_days)
        results_dict_a_eight_decrease['Avg_Next_20_Days_Returns'].append(avg_next_20_days_returns)
        results_dict_a_eight_decrease['P_Value_20_Days'].append(p_value_20_days)



        print(f'Company: {i}, Date: {event_date}, Avg Next Day Returns: {avg_next_day_returns} , P value: {p_value_1_day} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 2 Days Returns: {avg_next_2_days_returns},P value: {p_value_2_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 5 Days Returns: {avg_next_5_days_returns},P value: {p_value_5_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 20 Days Returns: {avg_next_20_days_returns},P value: {p_value_20_days} ')

results_significance_a_eight_decrease = pd.DataFrame(results_dict_a_eight_decrease)


# Calculate average ARs and p-values for 10% threshold
for i in df_price_2nd.columns[1:-1]:

    rows_with_condition = proxy_a_df[proxy_a_df[f'{i}_Decrease_10%'] == 1]

    for index, row in rows_with_condition.iterrows():

        # Date with large price change
        event_date = row['Date']

        # Index_next_day
        idx_next_day = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1
        idx_event_day = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0]

        # Index_next_2_days
        idx_next_2_days = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 2
        next_2_days_returns = stock_returns_df.loc[proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1: proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 2, f'{i}_returns']

        # Index_next_5_days
        idx_next_5_days = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 5
        next_5_days_returns = stock_returns_df.loc[proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1: proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 5, f'{i}_returns']

        # Index_next_20_days
        idx_next_20_days = proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 20
        next_20_days_returns = stock_returns_df.loc[proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1: proxy_a_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 20, f'{i}_returns']


        if idx_next_day < len(stock_returns_df) and idx_next_2_days < len(stock_returns_df) and \
           idx_next_5_days < len(stock_returns_df) and idx_next_20_days < len(stock_returns_df):

        # Average_next_day
            avg_next_day_returns = stock_returns_df.at[idx_next_day, f'{i}_returns'].mean()
            p_value_1_day = ttest_1samp(avg_next_day_returns,0).pvalue


            # Average_next_2_days
            avg_next_2_days_returns = next_2_days_returns.mean()
            p_value_2_days = ttest_1samp(next_2_days_returns, 0).pvalue

            # Average_next_5_days
            avg_next_5_days_returns = next_5_days_returns.mean()
            p_value_5_days = ttest_1samp(next_5_days_returns, 0).pvalue

            # Average_next_20_days
            avg_next_20_days_returns = next_20_days_returns.mean()
            p_value_20_days = ttest_1samp(next_20_days_returns, 0).pvalue



        p_value_1_day_two_tailed = p_value_1_day * 2
        p_value_2_days_two_tailed = p_value_2_days * 2
        p_value_5_days_two_tailed = p_value_5_days * 2
        p_value_20_days_two_tailed = p_value_20_days * 2

        results_dict_a_ten_decrease['Company'].append(i)
        results_dict_a_ten_decrease['Event_Date'].append(event_date)
        results_dict_a_ten_decrease['Avg_Next_Day_Returns'].append(avg_next_day_returns)
        results_dict_a_ten_decrease['P_Value_1_Day'].append(p_value_1_day)
        results_dict_a_ten_decrease['Avg_Next_2_Days_Returns'].append(avg_next_2_days_returns)
        results_dict_a_ten_decrease['P_Value_2_Days'].append(p_value_2_days)
        results_dict_a_ten_decrease['Avg_Next_5_Days_Returns'].append(avg_next_5_days_returns)
        results_dict_a_ten_decrease['P_Value_5_Days'].append(p_value_5_days)
        results_dict_a_ten_decrease['Avg_Next_20_Days_Returns'].append(avg_next_20_days_returns)
        results_dict_a_ten_decrease['P_Value_20_Days'].append(p_value_20_days)



        print(f'Company: {i}, Date: {event_date}, Avg Next Day Returns: {avg_next_day_returns} , P value: {p_value_1_day} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 2 Days Returns: {avg_next_2_days_returns},P value: {p_value_2_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 5 Days Returns: {avg_next_5_days_returns},P value: {p_value_5_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 20 Days Returns: {avg_next_20_days_returns},P value: {p_value_20_days} ')

results_significance_a_ten_decrease = pd.DataFrame(results_dict_a_ten_decrease)


### **Proxy B :**

Abnormal stock returns following large stock price increases and decreases

In [None]:
proxy_b_df = pd.read_excel(f'{path}/raw_data/main/proxy_b.xlsx')


#### Price increases: 

In [None]:
# Calculate stock returns
df_price_2nd = pd.read_excel(f'{path}/raw_data/main/price.xlsx')
stock_returns_df = pd.DataFrame(df_price_2nd['Date'].copy())

for column in df_price_2nd.columns[1:]:
    stock_returns_df[f'{column}_returns'] = df_price_2nd[column].pct_change()

stock_returns_df = stock_returns_df.reset_index()

# Create a dictionary to store the result for each threshold
results_dict_three_increase = {'Company': [],
                'Event_Date': [],
                'Avg_Next_Day_Returns': [],
                'P_Value_1_Day': [],
                'Avg_Next_2_Days_Returns': [],
                'P_Value_2_Days': [],
                'Avg_Next_5_Days_Returns': [],
                'P_Value_5_Days': [],
                'Avg_Next_20_Days_Returns': [],
                'P_Value_20_Days': []}

results_dict_four_increase = {'Company': [],
                'Event_Date': [],
                'Avg_Next_Day_Returns': [],
                'P_Value_1_Day': [],
                'Avg_Next_2_Days_Returns': [],
                'P_Value_2_Days': [],
                'Avg_Next_5_Days_Returns': [],
                'P_Value_5_Days': [],
                'Avg_Next_20_Days_Returns': [],
                'P_Value_20_Days': []}

# Calculate average ARs and p-values for 3 std threshold
for i in df_price_2nd.columns[1:-1]:

    rows_with_condition = proxy_b_df[proxy_b_df[f'{i}_Increase_3std'] == 1]

    for index, row in rows_with_condition.iterrows():

        # Date with large price change
        event_date = row['Date']

        # Index_next_day
        idx_next_day = proxy_b_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1
        idx_event_day = proxy_b_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0]

        # Index_next_2_days
        idx_next_2_days = proxy_b_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 2
        next_2_days_returns = stock_returns_df.loc[proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1: proxy_b_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 2, f'{i}_returns']

        # Index_next_5_days
        idx_next_5_days = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 5
        next_5_days_returns = stock_returns_df.loc[proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1: proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 5, f'{i}_returns']

        # Index_next_20_days
        idx_next_20_days = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 20
        next_20_days_returns = stock_returns_df.loc[proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1: proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 20, f'{i}_returns']


        if idx_next_day < len(stock_returns_df) and idx_next_2_days < len(stock_returns_df) and \
           idx_next_5_days < len(stock_returns_df) and idx_next_20_days < len(stock_returns_df):

        # Average_next_day
            avg_next_day_returns = stock_returns_df.at[idx_next_day, f'{i}_returns'].mean()
            p_value_1_day = ttest_1samp(avg_next_day_returns,0).pvalue


            # Average_next_2_days
            avg_next_2_days_returns = next_2_days_returns.mean()
            p_value_2_days = ttest_1samp(next_2_days_returns, 0).pvalue

            # Average_next_5_days
            avg_next_5_days_returns = next_5_days_returns.mean()
            p_value_5_days = ttest_1samp(next_5_days_returns, 0).pvalue

            # Average_next_20_days
            avg_next_20_days_returns = next_20_days_returns.mean()
            p_value_20_days = ttest_1samp(next_20_days_returns, 0).pvalue



        p_value_1_day_two_tailed = p_value_1_day * 2
        p_value_2_days_two_tailed = p_value_2_days * 2
        p_value_5_days_two_tailed = p_value_5_days * 2
        p_value_20_days_two_tailed = p_value_20_days * 2

        results_dict_three_increase['Company'].append(i)
        results_dict_three_increase['Event_Date'].append(event_date)
        results_dict_three_increase['Avg_Next_Day_Returns'].append(avg_next_day_returns)
        results_dict_three_increase['P_Value_1_Day'].append(p_value_1_day)
        results_dict_three_increase['Avg_Next_2_Days_Returns'].append(avg_next_2_days_returns)
        results_dict_three_increase['P_Value_2_Days'].append(p_value_2_days)
        results_dict_three_increase['Avg_Next_5_Days_Returns'].append(avg_next_5_days_returns)
        results_dict_three_increase['P_Value_5_Days'].append(p_value_5_days)
        results_dict_three_increase['Avg_Next_20_Days_Returns'].append(avg_next_20_days_returns)
        results_dict_three_increase['P_Value_20_Days'].append(p_value_20_days)



        print(f'Company: {i}, Date: {event_date}, Avg Next Day Returns: {avg_next_day_returns} , P value: {p_value_1_day} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 2 Days Returns: {avg_next_2_days_returns},P value: {p_value_2_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 5 Days Returns: {avg_next_5_days_returns},P value: {p_value_5_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 20 Days Returns: {avg_next_20_days_returns},P value: {p_value_20_days} ')

results_significance_b_three_increase = pd.DataFrame(results_dict_three_increase)


# Calculate average ARs and p-values for 4 std threshold
for i in df_price_2nd.columns[1:-1]:

    rows_with_condition = proxy_b_df[proxy_b_df[f'{i}_Increase_4std'] == 1]

    for index, row in rows_with_condition.iterrows():

        # Date with large price change
        event_date = row['Date']

        # Index_next_day
        idx_next_day = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1
        idx_event_day = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0]

        # Index_next_2_days
        idx_next_2_days = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 2
        next_2_days_returns = stock_returns_df.loc[proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1: proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 2, f'{i}_returns']

        # Index_next_5_days
        idx_next_5_days = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 5
        next_5_days_returns = stock_returns_df.loc[proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1: proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 5, f'{i}_returns']

        # Index_next_20_days
        idx_next_20_days = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 20
        next_20_days_returns = stock_returns_df.loc[proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1: proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 20, f'{i}_returns']


        if idx_next_day < len(stock_returns_df) and idx_next_2_days < len(stock_returns_df) and \
           idx_next_5_days < len(stock_returns_df) and idx_next_20_days < len(stock_returns_df):

        # Average_next_day
            avg_next_day_returns = stock_returns_df.at[idx_next_day, f'{i}_returns'].mean()
            p_value_1_day = ttest_1samp(avg_next_day_returns,0).pvalue


            # Average_next_2_days
            avg_next_2_days_returns = next_2_days_returns.mean()
            p_value_2_days = ttest_1samp(next_2_days_returns, 0).pvalue

            # Average_next_5_days
            avg_next_5_days_returns = next_5_days_returns.mean()
            p_value_5_days = ttest_1samp(next_5_days_returns, 0).pvalue

            # Average_next_20_days
            avg_next_20_days_returns = next_20_days_returns.mean()
            p_value_20_days = ttest_1samp(next_20_days_returns, 0).pvalue



        p_value_1_day_two_tailed = p_value_1_day * 2
        p_value_2_days_two_tailed = p_value_2_days * 2
        p_value_5_days_two_tailed = p_value_5_days * 2
        p_value_20_days_two_tailed = p_value_20_days * 2

        results_dict_four_increase['Company'].append(i)
        results_dict_four_increase['Event_Date'].append(event_date)
        results_dict_four_increase['Avg_Next_Day_Returns'].append(avg_next_day_returns)
        results_dict_four_increase['P_Value_1_Day'].append(p_value_1_day)
        results_dict_four_increase['Avg_Next_2_Days_Returns'].append(avg_next_2_days_returns)
        results_dict_four_increase['P_Value_2_Days'].append(p_value_2_days)
        results_dict_four_increase['Avg_Next_5_Days_Returns'].append(avg_next_5_days_returns)
        results_dict_four_increase['P_Value_5_Days'].append(p_value_5_days)
        results_dict_four_increase['Avg_Next_20_Days_Returns'].append(avg_next_20_days_returns)
        results_dict_four_increase['P_Value_20_Days'].append(p_value_20_days)



        print(f'Company: {i}, Date: {event_date}, Avg Next Day Returns: {avg_next_day_returns} , P value: {p_value_1_day} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 2 Days Returns: {avg_next_2_days_returns},P value: {p_value_2_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 5 Days Returns: {avg_next_5_days_returns},P value: {p_value_5_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 20 Days Returns: {avg_next_20_days_returns},P value: {p_value_20_days} ')

results_significance_b_four_increase = pd.DataFrame(results_dict_four_increase)


#### Price decreases: 

In [None]:
# Calculate stock returns
df_price_2nd = pd.read_excel(f'{path}/raw_data/main/price.xlsx')
stock_returns_df = pd.DataFrame(df_price_2nd['Date'].copy())

for column in df_price_2nd.columns[1:]:
    stock_returns_df[f'{column}_returns'] = df_price_2nd[column].pct_change()

stock_returns_df = stock_returns_df.reset_index()

# Create a dictionary to store the result for each threshold
results_dict_three_decrease = {'Company': [],
                'Event_Date': [],
                'Avg_Next_Day_Returns': [],
                'P_Value_1_Day': [],
                'Avg_Next_2_Days_Returns': [],
                'P_Value_2_Days': [],
                'Avg_Next_5_Days_Returns': [],
                'P_Value_5_Days': [],
                'Avg_Next_20_Days_Returns': [],
                'P_Value_20_Days': []}

results_dict_four_decrease = {'Company': [],
                'Event_Date': [],
                'Avg_Next_Day_Returns': [],
                'P_Value_1_Day': [],
                'Avg_Next_2_Days_Returns': [],
                'P_Value_2_Days': [],
                'Avg_Next_5_Days_Returns': [],
                'P_Value_5_Days': [],
                'Avg_Next_20_Days_Returns': [],
                'P_Value_20_Days': []}

# Calculate average ARs and p-values for 3 std threshold
for i in df_price_2nd.columns[1:-1]:

    rows_with_condition = proxy_b_df[proxy_b_df[f'{i}_Decrease_3std'] == 1]

    for index, row in rows_with_condition.iterrows():

        # Date with large price change
        event_date = row['Date']

        # Index_next_day
        idx_next_day = proxy_b_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 1
        idx_event_day = proxy_b_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0]

        # Index_next_2_days
        idx_next_2_days = proxy_b_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 2
        next_2_days_returns = stock_returns_df.loc[proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1: proxy_b_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 2, f'{i}_returns']

        # Index_next_5_days
        idx_next_5_days = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 5
        next_5_days_returns = stock_returns_df.loc[proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1: proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 5, f'{i}_returns']

        # Index_next_20_days
        idx_next_20_days = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 20
        next_20_days_returns = stock_returns_df.loc[proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1: proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 20, f'{i}_returns']


        if idx_next_day < len(stock_returns_df) and idx_next_2_days < len(stock_returns_df) and \
           idx_next_5_days < len(stock_returns_df) and idx_next_20_days < len(stock_returns_df):

        # Average_next_day
            avg_next_day_returns = stock_returns_df.at[idx_next_day, f'{i}_returns'].mean()
            p_value_1_day = ttest_1samp(avg_next_day_returns,0).pvalue


            # Average_next_2_days
            avg_next_2_days_returns = next_2_days_returns.mean()
            p_value_2_days = ttest_1samp(next_2_days_returns, 0).pvalue

            # Average_next_5_days
            avg_next_5_days_returns = next_5_days_returns.mean()
            p_value_5_days = ttest_1samp(next_5_days_returns, 0).pvalue

            # Average_next_20_days
            avg_next_20_days_returns = next_20_days_returns.mean()
            p_value_20_days = ttest_1samp(next_20_days_returns, 0).pvalue



        p_value_1_day_two_tailed = p_value_1_day * 2
        p_value_2_days_two_tailed = p_value_2_days * 2
        p_value_5_days_two_tailed = p_value_5_days * 2
        p_value_20_days_two_tailed = p_value_20_days * 2

        results_dict_three_decrease['Company'].append(i)
        results_dict_three_decrease['Event_Date'].append(event_date)
        results_dict_three_decrease['Avg_Next_Day_Returns'].append(avg_next_day_returns)
        results_dict_three_decrease['P_Value_1_Day'].append(p_value_1_day)
        results_dict_three_decrease['Avg_Next_2_Days_Returns'].append(avg_next_2_days_returns)
        results_dict_three_decrease['P_Value_2_Days'].append(p_value_2_days)
        results_dict_three_decrease['Avg_Next_5_Days_Returns'].append(avg_next_5_days_returns)
        results_dict_three_decrease['P_Value_5_Days'].append(p_value_5_days)
        results_dict_three_decrease['Avg_Next_20_Days_Returns'].append(avg_next_20_days_returns)
        results_dict_three_decrease['P_Value_20_Days'].append(p_value_20_days)



        print(f'Company: {i}, Date: {event_date}, Avg Next Day Returns: {avg_next_day_returns} , P value: {p_value_1_day} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 2 Days Returns: {avg_next_2_days_returns},P value: {p_value_2_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 5 Days Returns: {avg_next_5_days_returns},P value: {p_value_5_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 20 Days Returns: {avg_next_20_days_returns},P value: {p_value_20_days} ')

results_significance_b_three_decrease = pd.DataFrame(results_dict_three_decrease)


# Calculate average ARs and p-values for 4 std threshold
for i in df_price_2nd.columns[1:-1]:

    rows_with_condition = proxy_b_df[proxy_b_df[f'{i}_Decrease_4std'] == 1]

    for index, row in rows_with_condition.iterrows():

        # Date with large price change
        event_date = row['Date']

        # Index_next_day
        idx_next_day = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1
        idx_event_day = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0]

        # Index_next_2_days
        idx_next_2_days = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 2
        next_2_days_returns = stock_returns_df.loc[proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1: proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 2, f'{i}_returns']

        # Index_next_5_days
        idx_next_5_days = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 5
        next_5_days_returns = stock_returns_df.loc[proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1: proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 5, f'{i}_returns']

        # Index_next_20_days
        idx_next_20_days = proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 20
        next_20_days_returns = stock_returns_df.loc[proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 1: proxy_b_df.index[proxy_b_df['Date'] == event_date].to_numpy()[0] + 20, f'{i}_returns']


        if idx_next_day < len(stock_returns_df) and idx_next_2_days < len(stock_returns_df) and \
           idx_next_5_days < len(stock_returns_df) and idx_next_20_days < len(stock_returns_df):

        # Average_next_day
            avg_next_day_returns = stock_returns_df.at[idx_next_day, f'{i}_returns'].mean()
            p_value_1_day = ttest_1samp(avg_next_day_returns,0).pvalue


            # Average_next_2_days
            avg_next_2_days_returns = next_2_days_returns.mean()
            p_value_2_days = ttest_1samp(next_2_days_returns, 0).pvalue

            # Average_next_5_days
            avg_next_5_days_returns = next_5_days_returns.mean()
            p_value_5_days = ttest_1samp(next_5_days_returns, 0).pvalue

            # Average_next_20_days
            avg_next_20_days_returns = next_20_days_returns.mean()
            p_value_20_days = ttest_1samp(next_20_days_returns, 0).pvalue



        p_value_1_day_two_tailed = p_value_1_day * 2
        p_value_2_days_two_tailed = p_value_2_days * 2
        p_value_5_days_two_tailed = p_value_5_days * 2
        p_value_20_days_two_tailed = p_value_20_days * 2

        results_dict_four_decrease['Company'].append(i)
        results_dict_four_decrease['Event_Date'].append(event_date)
        results_dict_four_decrease['Avg_Next_Day_Returns'].append(avg_next_day_returns)
        results_dict_four_decrease['P_Value_1_Day'].append(p_value_1_day)
        results_dict_four_decrease['Avg_Next_2_Days_Returns'].append(avg_next_2_days_returns)
        results_dict_four_decrease['P_Value_2_Days'].append(p_value_2_days)
        results_dict_four_decrease['Avg_Next_5_Days_Returns'].append(avg_next_5_days_returns)
        results_dict_four_decrease['P_Value_5_Days'].append(p_value_5_days)
        results_dict_four_decrease['Avg_Next_20_Days_Returns'].append(avg_next_20_days_returns)
        results_dict_four_decrease['P_Value_20_Days'].append(p_value_20_days)



        print(f'Company: {i}, Date: {event_date}, Avg Next Day Returns: {avg_next_day_returns} , P value: {p_value_1_day} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 2 Days Returns: {avg_next_2_days_returns},P value: {p_value_2_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 5 Days Returns: {avg_next_5_days_returns},P value: {p_value_5_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 20 Days Returns: {avg_next_20_days_returns},P value: {p_value_20_days} ')

results_significance_b_four_decrease = pd.DataFrame(results_dict_four_decrease)


### **Proxy C :**

Abnormal stock returns following large stock price increases and decreases

In [None]:
proxy_c_df = pd.read_excel(f'{path}/raw_data/main/proxy_c.xlsx')


#### Price increases

In [None]:
# Calculate stock returns
df_price_2nd = pd.read_excel(f'{path}/raw_data/main/price.xlsx')
stock_returns_df = pd.DataFrame(df_price_2nd['Date'].copy())

for column in df_price_2nd.columns[1:]:
    stock_returns_df[f'{column}_returns'] = df_price_2nd[column].pct_change()

stock_returns_df = stock_returns_df.reset_index()
stock_returns_df.to_excel(f'{path}/raw_data/main/stock_returns.xlsx')

# Create a dictionary to store the result for each threshold
results_dict_c_eight_increase = {'Company': [],
                'Event_Date': [],
                'Avg_Next_Day_Returns': [],
                'P_Value_1_Day': [],
                'Avg_Next_2_Days_Returns': [],
                'P_Value_2_Days': [],
                'Avg_Next_5_Days_Returns': [],
                'P_Value_5_Days': [],
                'Avg_Next_20_Days_Returns': [],
                'P_Value_20_Days': []}

results_dict_c_ten_increase = {'Company': [],
                'Event_Date': [],
                'Avg_Next_Day_Returns': [],
                'P_Value_1_Day': [],
                'Avg_Next_2_Days_Returns': [],
                'P_Value_2_Days': [],
                'Avg_Next_5_Days_Returns': [],
                'P_Value_5_Days': [],
                'Avg_Next_20_Days_Returns': [],
                'P_Value_20_Days': []}

# Calculate average ARs and p-values for 8% threshold
for i in df_price_2nd.columns[1:-1]:

    rows_with_condition = proxy_c_df[proxy_c_df[f'{i}_Increase_8%'] == 1]

    for index, row in rows_with_condition.iterrows():

        # Date with large price change
        event_date = row['Date']

        # Index_next_day
        idx_next_day = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1
        idx_event_day = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0]

        # Index_next_2_days
        idx_next_2_days = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 2
        next_2_days_returns = stock_returns_df.loc[proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1: proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 2, f'{i}_returns']

        # Index_next_5_days
        idx_next_5_days = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 5
        next_5_days_returns = stock_returns_df.loc[proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1: proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 5, f'{i}_returns']

        # Index_next_20_days
        idx_next_20_days = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 20
        next_20_days_returns = stock_returns_df.loc[proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1: proxy_c_df.index[proxy_a_df['Date'] == event_date].to_numpy()[0] + 20, f'{i}_returns']


        if idx_next_day < len(stock_returns_df) and idx_next_2_days < len(stock_returns_df) and \
           idx_next_5_days < len(stock_returns_df) and idx_next_20_days < len(stock_returns_df):

        # Average_next_day
            avg_next_day_returns = stock_returns_df.at[idx_next_day, f'{i}_returns'].mean()
            p_value_1_day = ttest_1samp(avg_next_day_returns,0).pvalue


            # Average_next_2_days
            avg_next_2_days_returns = next_2_days_returns.mean()
            p_value_2_days = ttest_1samp(next_2_days_returns, 0).pvalue

            # Average_next_5_days
            avg_next_5_days_returns = next_5_days_returns.mean()
            p_value_5_days = ttest_1samp(next_5_days_returns, 0).pvalue

            # Average_next_20_days
            avg_next_20_days_returns = next_20_days_returns.mean()
            p_value_20_days = ttest_1samp(next_20_days_returns, 0).pvalue



        p_value_1_day_two_tailed = p_value_1_day * 2
        p_value_2_days_two_tailed = p_value_2_days * 2
        p_value_5_days_two_tailed = p_value_5_days * 2
        p_value_20_days_two_tailed = p_value_20_days * 2

        results_dict_c_eight_increase['Company'].append(i)
        results_dict_c_eight_increase['Event_Date'].append(event_date)
        results_dict_c_eight_increase['Avg_Next_Day_Returns'].append(avg_next_day_returns)
        results_dict_c_eight_increase['P_Value_1_Day'].append(p_value_1_day)
        results_dict_c_eight_increase['Avg_Next_2_Days_Returns'].append(avg_next_2_days_returns)
        results_dict_c_eight_increase['P_Value_2_Days'].append(p_value_2_days)
        results_dict_c_eight_increase['Avg_Next_5_Days_Returns'].append(avg_next_5_days_returns)
        results_dict_c_eight_increase['P_Value_5_Days'].append(p_value_5_days)
        results_dict_c_eight_increase['Avg_Next_20_Days_Returns'].append(avg_next_20_days_returns)
        results_dict_c_eight_increase['P_Value_20_Days'].append(p_value_20_days)



        print(f'Company: {i}, Date: {event_date}, Avg Next Day Returns: {avg_next_day_returns} , P value: {p_value_1_day} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 2 Days Returns: {avg_next_2_days_returns},P value: {p_value_2_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 5 Days Returns: {avg_next_5_days_returns},P value: {p_value_5_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 20 Days Returns: {avg_next_20_days_returns},P value: {p_value_20_days} ')

results_significance_c_eight_increase = pd.DataFrame(results_dict_c_eight_increase)


# Calculate average ARs and p-values for 10% threshold
for i in df_price_2nd.columns[1:-1]:

    rows_with_condition = proxy_c_df[proxy_c_df[f'{i}_Increase_10%'] == 1]

    for index, row in rows_with_condition.iterrows():

        # Date with large price change
        event_date = row['Date']

        # Index_next_day
        idx_next_day = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1
        idx_event_day = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0]

        # Index_next_2_days
        idx_next_2_days = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 2
        next_2_days_returns = stock_returns_df.loc[proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1: proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 2, f'{i}_returns']

        # Index_next_5_days
        idx_next_5_days = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 5
        next_5_days_returns = stock_returns_df.loc[proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1: proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 5, f'{i}_returns']

        # Index_next_20_days
        idx_next_20_days = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 20
        next_20_days_returns = stock_returns_df.loc[proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1: proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 20, f'{i}_returns']


        if idx_next_day < len(stock_returns_df) and idx_next_2_days < len(stock_returns_df) and \
           idx_next_5_days < len(stock_returns_df) and idx_next_20_days < len(stock_returns_df):

        # Average_next_day
            avg_next_day_returns = stock_returns_df.at[idx_next_day, f'{i}_returns'].mean()
            p_value_1_day = ttest_1samp(avg_next_day_returns,0).pvalue


            # Average_next_2_days
            avg_next_2_days_returns = next_2_days_returns.mean()
            p_value_2_days = ttest_1samp(next_2_days_returns, 0).pvalue

            # Average_next_5_days
            avg_next_5_days_returns = next_5_days_returns.mean()
            p_value_5_days = ttest_1samp(next_5_days_returns, 0).pvalue

            # Average_next_20_days
            avg_next_20_days_returns = next_20_days_returns.mean()
            p_value_20_days = ttest_1samp(next_20_days_returns, 0).pvalue



        p_value_1_day_two_tailed = p_value_1_day * 2
        p_value_2_days_two_tailed = p_value_2_days * 2
        p_value_5_days_two_tailed = p_value_5_days * 2
        p_value_20_days_two_tailed = p_value_20_days * 2

        results_dict_c_ten_increase['Company'].append(i)
        results_dict_c_ten_increase['Event_Date'].append(event_date)
        results_dict_c_ten_increase['Avg_Next_Day_Returns'].append(avg_next_day_returns)
        results_dict_c_ten_increase['P_Value_1_Day'].append(p_value_1_day)
        results_dict_c_ten_increase['Avg_Next_2_Days_Returns'].append(avg_next_2_days_returns)
        results_dict_c_ten_increase['P_Value_2_Days'].append(p_value_2_days)
        results_dict_c_ten_increase['Avg_Next_5_Days_Returns'].append(avg_next_5_days_returns)
        results_dict_c_ten_increase['P_Value_5_Days'].append(p_value_5_days)
        results_dict_c_ten_increase['Avg_Next_20_Days_Returns'].append(avg_next_20_days_returns)
        results_dict_c_ten_increase['P_Value_20_Days'].append(p_value_20_days)



        print(f'Company: {i}, Date: {event_date}, Avg Next Day Returns: {avg_next_day_returns} , P value: {p_value_1_day} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 2 Days Returns: {avg_next_2_days_returns},P value: {p_value_2_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 5 Days Returns: {avg_next_5_days_returns},P value: {p_value_5_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 20 Days Returns: {avg_next_20_days_returns},P value: {p_value_20_days} ')

results_significance_c_ten_increase = pd.DataFrame(results_dict_c_ten_increase)


#### Price decreases: 

In [None]:
# Calculate stock returns
df_price_2nd = pd.read_excel(f'{path}/raw_data/main/price.xlsx')
stock_returns_df = pd.DataFrame(df_price_2nd['Date'].copy())

for column in df_price_2nd.columns[1:]:
    stock_returns_df[f'{column}_returns'] = df_price_2nd[column].pct_change()

stock_returns_df = stock_returns_df.reset_index()

# Create a dictionary to store the result for each threshold
results_dict_c_eight_decrease = {'Company': [],
                'Event_Date': [],
                'Avg_Next_Day_Returns': [],
                'P_Value_1_Day': [],
                'Avg_Next_2_Days_Returns': [],
                'P_Value_2_Days': [],
                'Avg_Next_5_Days_Returns': [],
                'P_Value_5_Days': [],
                'Avg_Next_20_Days_Returns': [],
                'P_Value_20_Days': []}

results_dict_c_ten_decrease = {'Company': [],
                'Event_Date': [],
                'Avg_Next_Day_Returns': [],
                'P_Value_1_Day': [],
                'Avg_Next_2_Days_Returns': [],
                'P_Value_2_Days': [],
                'Avg_Next_5_Days_Returns': [],
                'P_Value_5_Days': [],
                'Avg_Next_20_Days_Returns': [],
                'P_Value_20_Days': []}

# Calculate average ARs and p-values for 8% threshold
for i in df_price_2nd.columns[1:-1]:

    rows_with_condition = proxy_c_df[proxy_c_df[f'{i}_Decrease_8%'] == 1]

    for index, row in rows_with_condition.iterrows():

        # Date with large price change
        event_date = row['Date']

        # Index_next_day
        idx_next_day = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1
        idx_event_day = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0]

        # Index_next_2_days
        idx_next_2_days = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 2
        next_2_days_returns = stock_returns_df.loc[proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1: proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 2, f'{i}_returns']

        # Index_next_5_days
        idx_next_5_days = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 5
        next_5_days_returns = stock_returns_df.loc[proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1: proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 5, f'{i}_returns']

        # Index_next_20_days
        idx_next_20_days = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 20
        next_20_days_returns = stock_returns_df.loc[proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1: proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 20, f'{i}_returns']


        if idx_next_day < len(stock_returns_df) and idx_next_2_days < len(stock_returns_df) and \
           idx_next_5_days < len(stock_returns_df) and idx_next_20_days < len(stock_returns_df):

        # Average_next_day
            avg_next_day_returns = stock_returns_df.at[idx_next_day, f'{i}_returns'].mean()
            p_value_1_day = ttest_1samp(avg_next_day_returns,0).pvalue


            # Average_next_2_days
            avg_next_2_days_returns = next_2_days_returns.mean()
            p_value_2_days = ttest_1samp(next_2_days_returns, 0).pvalue

            # Average_next_5_days
            avg_next_5_days_returns = next_5_days_returns.mean()
            p_value_5_days = ttest_1samp(next_5_days_returns, 0).pvalue

            # Average_next_20_days
            avg_next_20_days_returns = next_20_days_returns.mean()
            p_value_20_days = ttest_1samp(next_20_days_returns, 0).pvalue



        p_value_1_day_two_tailed = p_value_1_day * 2
        p_value_2_days_two_tailed = p_value_2_days * 2
        p_value_5_days_two_tailed = p_value_5_days * 2
        p_value_20_days_two_tailed = p_value_20_days * 2

        results_dict_c_eight_decrease['Company'].append(i)
        results_dict_c_eight_decrease['Event_Date'].append(event_date)
        results_dict_c_eight_decrease['Avg_Next_Day_Returns'].append(avg_next_day_returns)
        results_dict_c_eight_decrease['P_Value_1_Day'].append(p_value_1_day)
        results_dict_c_eight_decrease['Avg_Next_2_Days_Returns'].append(avg_next_2_days_returns)
        results_dict_c_eight_decrease['P_Value_2_Days'].append(p_value_2_days)
        results_dict_c_eight_decrease['Avg_Next_5_Days_Returns'].append(avg_next_5_days_returns)
        results_dict_c_eight_decrease['P_Value_5_Days'].append(p_value_5_days)
        results_dict_c_eight_decrease['Avg_Next_20_Days_Returns'].append(avg_next_20_days_returns)
        results_dict_c_eight_decrease['P_Value_20_Days'].append(p_value_20_days)



        print(f'Company: {i}, Date: {event_date}, Avg Next Day Returns: {avg_next_day_returns} , P value: {p_value_1_day} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 2 Days Returns: {avg_next_2_days_returns},P value: {p_value_2_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 5 Days Returns: {avg_next_5_days_returns},P value: {p_value_5_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 20 Days Returns: {avg_next_20_days_returns},P value: {p_value_20_days} ')

results_significance_c_eight_decrease = pd.DataFrame(results_dict_c_eight_decrease)


# Calculate average ARs and p-values for 10% threshold
for i in df_price_2nd.columns[1:-1]:

    rows_with_condition = proxy_c_df[proxy_c_df[f'{i}_Decrease_10%'] == 1]

    for index, row in rows_with_condition.iterrows():

        # Date with large price change
        event_date = row['Date']

        # Index_next_day
        idx_next_day = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1
        idx_event_day = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0]

        # Index_next_2_days
        idx_next_2_days = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 2
        next_2_days_returns = stock_returns_df.loc[proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1: proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 2, f'{i}_returns']

        # Index_next_5_days
        idx_next_5_days = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 5
        next_5_days_returns = stock_returns_df.loc[proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1: proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 5, f'{i}_returns']

        # Index_next_20_days
        idx_next_20_days = proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 20
        next_20_days_returns = stock_returns_df.loc[proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 1: proxy_c_df.index[proxy_c_df['Date'] == event_date].to_numpy()[0] + 20, f'{i}_returns']


        if idx_next_day < len(stock_returns_df) and idx_next_2_days < len(stock_returns_df) and \
           idx_next_5_days < len(stock_returns_df) and idx_next_20_days < len(stock_returns_df):

        # Average_next_day
            avg_next_day_returns = stock_returns_df.at[idx_next_day, f'{i}_returns'].mean()
            p_value_1_day = ttest_1samp(avg_next_day_returns,0).pvalue


            # Average_next_2_days
            avg_next_2_days_returns = next_2_days_returns.mean()
            p_value_2_days = ttest_1samp(next_2_days_returns, 0).pvalue

            # Average_next_5_days
            avg_next_5_days_returns = next_5_days_returns.mean()
            p_value_5_days = ttest_1samp(next_5_days_returns, 0).pvalue

            # Average_next_20_days
            avg_next_20_days_returns = next_20_days_returns.mean()
            p_value_20_days = ttest_1samp(next_20_days_returns, 0).pvalue



        p_value_1_day_two_tailed = p_value_1_day * 2
        p_value_2_days_two_tailed = p_value_2_days * 2
        p_value_5_days_two_tailed = p_value_5_days * 2
        p_value_20_days_two_tailed = p_value_20_days * 2

        results_dict_c_ten_decrease['Company'].append(i)
        results_dict_c_ten_decrease['Event_Date'].append(event_date)
        results_dict_c_ten_decrease['Avg_Next_Day_Returns'].append(avg_next_day_returns)
        results_dict_c_ten_decrease['P_Value_1_Day'].append(p_value_1_day)
        results_dict_c_ten_decrease['Avg_Next_2_Days_Returns'].append(avg_next_2_days_returns)
        results_dict_c_ten_decrease['P_Value_2_Days'].append(p_value_2_days)
        results_dict_c_ten_decrease['Avg_Next_5_Days_Returns'].append(avg_next_5_days_returns)
        results_dict_c_ten_decrease['P_Value_5_Days'].append(p_value_5_days)
        results_dict_c_ten_decrease['Avg_Next_20_Days_Returns'].append(avg_next_20_days_returns)
        results_dict_c_ten_decrease['P_Value_20_Days'].append(p_value_20_days)



        print(f'Company: {i}, Date: {event_date}, Avg Next Day Returns: {avg_next_day_returns} , P value: {p_value_1_day} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 2 Days Returns: {avg_next_2_days_returns},P value: {p_value_2_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 5 Days Returns: {avg_next_5_days_returns},P value: {p_value_5_days} ')
        print(f'Company: {i}, Date: {event_date}, Avg Next 20 Days Returns: {avg_next_20_days_returns},P value: {p_value_20_days} ')

results_significance_c_ten_decrease = pd.DataFrame(results_dict_c_ten_decrease)


In [None]:
results_significance_a_eight_increase.to_excel(f'{path}/raw_data/main/results_significance_a_eight_increase.xlsx')
results_significance_a_eight_decrease.to_excel(f'{path}/raw_data/main/results_significance_a_eight_decrease.xlsx')
results_significance_a_ten_increase.to_excel(f'{path}/raw_data/main/results_significance_a_ten_increase.xlsx')
results_significance_a_ten_decrease.to_excel(f'{path}/raw_data/main/results_significance_a_ten_decrease.xlsx')

results_significance_b_three_increase.to_excel(f'{path}/raw_data/main/results_significance_b_three_increase.xlsx')
results_significance_b_three_decrease.to_excel(f'{path}/raw_data/main/results_significance_b_three_decrease.xlsx')
results_significance_b_four_increase.to_excel(f'{path}/raw_data/main/results_significance_b_four_increase.xlsx')
results_significance_b_four_decrease.to_excel(f'{path}/raw_data/main/results_significance_b_four_decrease.xlsx')

results_significance_c_eight_increase.to_excel(f'{path}/raw_data/main/results_significance_c_eight_increase.xlsx')
results_significance_c_eight_decrease.to_excel(f'{path}/raw_data/main/results_significance_c_eight_decrease.xlsx')
results_significance_c_ten_increase.to_excel(f'{path}/raw_data/main/results_significance_c_ten_increase.xlsx')
results_significance_c_ten_decrease.to_excel(f'{path}/raw_data/main/results_significance_c_ten_decrease.xlsx')
