In [None]:
import pandas as pd
from scipy.stats import wilcoxon

data = pd.read_excel(r"C:\UU\THESIS\AMX\monthly prices 2002-2007\ADDED\hypothesis2\added2.xlsx", sheet_name=None)

combined_data = pd.concat(data.values(), ignore_index=True)


for sheet_name, df in data.items():
    if df.empty:
        continue
    
#extracting the first digit of the SIC code to classify companies into industry sectors
combined_data['sic_code_first_digit'] = combined_data['sic_code'].astype(str).str[0]

#calculating performance measures
combined_data['profit_margin'] = (combined_data['net_income'] / combined_data['sales']) *100
combined_data['operating_margin'] = (combined_data['operating_income'] / combined_data['sales'])*100
combined_data['roa'] = (combined_data['net_income'] / combined_data['total_assets']) *100
combined_data['oibd_to_assets'] = (combined_data['oibd'] / combined_data['total_assets']) *100
combined_data['market_to_book'] = (combined_data['mv'] / (combined_data['book_value'] * combined_data['com_shares_outstanding'])) *100

#define the performance measures
measures = ['profit_margin', 'operating_margin', 'roa', 'oibd_to_assets', 'market_to_book']

combined_data['date'] = pd.to_datetime(combined_data['date'])

#function to calculate industry averages and adjust performance measures
def adjust_performance_measures(data, measures):
    adjusted_data = data.copy()
    for measure in measures:
        #calculate industry averages by year and first digit of SIC code
        industry_averages = data.groupby(['date', 'sic_code_first_digit'])[measure].transform('mean')
        #print(industry_averages)

        #adjust the performance measures
        adjusted_data[f'{measure}_adjusted'] = data[measure] - industry_averages

    return adjusted_data

#adjust the performance measures
adjusted_data = adjust_performance_measures(combined_data, measures)

#select necessary columns for analysis
adjusted_columns = ['date', 'sic_code', 'sic_code_first_digit'] + [f'{measure}_adjusted' for measure in measures]
adjusted_data = adjusted_data[adjusted_columns]

output_file = r"C:\UU\THESIS\AMX\monthly prices 2002-2007\ADDED\hypothesis2\adj_data.xlsx"
adjusted_data.to_excel(output_file, sheet_name='adj_data', index=True)

adjusted_data


In [None]:
import pandas as pd
from scipy.stats import wilcoxon

data = pd.read_excel(r"C:\UU\THESIS\AMX\monthly prices 2002-2007\DELETED\hypothesis2\deleted2.xlsx", sheet_name=None)

combined_data = pd.concat(data.values(), ignore_index=True)

for sheet_name, df in data.items():
    if df.empty:
        continue


#extracting the first digit of the SIC code to classify companies into industry sectors
combined_data['sic_code_first_digit'] = combined_data['sic_code'].astype(str).str[0]

#calculating performance measures
combined_data['profit_margin'] = (combined_data['net_income'] / combined_data['sales']) *100
combined_data['operating_margin'] = (combined_data['operating_income'] / combined_data['sales'])*100
combined_data['roa'] = (combined_data['net_income'] / combined_data['total_assets']) *100
combined_data['oibd_to_assets'] = (combined_data['oibd'] / combined_data['total_assets']) *100
combined_data['market_to_book'] = (combined_data['mv'] / (combined_data['book_value'] * combined_data['com_shares_outstanding'])) *100

#define the performance measures
measures = ['profit_margin', 'operating_margin', 'roa', 'oibd_to_assets', 'market_to_book']

combined_data['date'] = pd.to_datetime(combined_data['date'])

#function to calculate industry averages and adjust performance measures
def adjust_performance_measures(data, measures):
    adjusted_data = data.copy()
    for measure in measures:
        for date, group in data.groupby('date'):
            for sic_code, subgroup in group.groupby('sic_code_first_digit'):
                if len(subgroup) > 1:
                    industry_average = subgroup[measure].mean()
                    adjusted_data.loc[subgroup.index, f'{measure}_adjusted'] = subgroup[measure] - industry_average
                else:
                    adjusted_data.loc[subgroup.index, f'{measure}_adjusted'] = subgroup[measure]
    return adjusted_data

#adjust the performance measures
adjusted_data = adjust_performance_measures(combined_data, measures)

#select necessary columns for analysis
adjusted_columns = ['date', 'sic_code', 'sic_code_first_digit'] + [f'{measure}_adjusted' for measure in measures]
adjusted_data = adjusted_data[adjusted_columns]

output_file = r"C:\UU\THESIS\AMX\monthly prices 2002-2007\DELETED\hypothesis2\adj_data.xlsx"
adjusted_data.to_excel(output_file, sheet_name='adj_data', index=True)


adjusted_data


In [None]:
import pandas as pd
from scipy.stats import mannwhitneyu, wilcoxon, binomtest, ttest_rel
import numpy as np

file_path = r"C:\UU\THESIS\AMX\HYPOTHESIS 2\DELETED\deleted - Copy.xlsx"
xls = pd.ExcelFile(file_path)
data = pd.concat([pd.read_excel(xls, sheet_name=sheet) for sheet in xls.sheet_names])

#define the periods for t-5, t, and t+5
periods_deleted = {
    't-5': 0,
    't': 5,
    't+5': 10
}

df_t_minus_5 = data[data['years'] == periods_deleted['t-5']]
df_t = data[data['years'] == periods_deleted['t']]
df_t_plus_5 = data[data['years'] == periods_deleted['t+5']]

df_t_minus_5 = df_t_minus_5.sort_values(by='years').reset_index(drop=True)
df_t = df_t.sort_values(by='years').reset_index(drop=True)
df_t_plus_5 = df_t_plus_5.sort_values(by='years').reset_index(drop=True)

#define the columns to test
columns_to_test = [ 'profit_margin_adjusted',
    'roa_adjusted', 'oibd_to_assets_adjusted', 'market_to_book_adjusted']

results = {
    'mann_whitney': {'Measure': [], 'Period': [], 'Statistic': [], 'p-value': []},
    'wilcoxon': {'Measure': [], 'Period': [], 'Statistic': [], 't-value': [], 'p-value': []},
    'sign_test': {'Measure': [], 'Period': [], 'Statistic': [], 'p-value': []},
    'paired_ttest': {'Measure': [], 'Period': [], 'Statistic': [], 'p-value': []}
}

def perform_wilcoxon_test(df1, df2, column):
    data1 = df1[column].dropna().values
    data2 = df2[column].dropna().values
    if len(data1) == 0 or len(data2) == 0:
        return (None, None, None)
    
    stat, p_value = wilcoxon(data1, data2)
    
    #calculate the t-value from the Wilcoxon statistic
    n = len(data1)
    mu_w = n * (n + 1) / 4
    sigma_w = np.sqrt(n * (n + 1) * (2 * n + 1) / 24)
    t_value = (stat - mu_w) / sigma_w
    
    return stat, t_value, p_value

#function to perform the Sign Test
def perform_sign_test(df1, df2, column):
    data1 = df1[column].dropna().values
    data2 = df2[column].dropna().values
    min_length = min(len(data1), len(data2))
    data1 = data1[:min_length]
    data2 = data2[:min_length]
    differences = data1 - data2
    n_positive = sum(differences > 0)
    n_negative = sum(differences < 0)
    n_total = n_positive + n_negative
    if n_total == 0:
        return (None, None)
    p_value = binomtest(n_positive, n_total, 0.5, alternative='two-sided').pvalue
    return (n_positive - n_negative, p_value)

for column in columns_to_test:
    # Mann-Whitney U Test
    if not df_t_minus_5[column].dropna().empty and not df_t[column].dropna().empty:
        stat, p_value = mannwhitneyu(df_t_minus_5[column], df_t[column], alternative='two-sided')
        results['mann_whitney']['Measure'].append(column)
        results['mann_whitney']['Period'].append('t-5 vs t')
        results['mann_whitney']['Statistic'].append(stat)
        results['mann_whitney']['p-value'].append(p_value)
        
    if not df_t[column].dropna().empty and not df_t_plus_5[column].dropna().empty:
        stat, p_value = mannwhitneyu(df_t[column], df_t_plus_5[column], alternative='two-sided')
        results['mann_whitney']['Measure'].append(column)
        results['mann_whitney']['Period'].append('t vs t+5')
        results['mann_whitney']['Statistic'].append(stat)
        results['mann_whitney']['p-value'].append(p_value)

    #Wilcoxon Signed Rank Test
    stat, t_value, p_value = perform_wilcoxon_test(df_t_minus_5, df_t, column)
    results['wilcoxon']['Measure'].append(column)
    results['wilcoxon']['Period'].append('t-5 vs t')
    results['wilcoxon']['Statistic'].append(stat)
    results['wilcoxon']['t-value'].append(t_value)
    results['wilcoxon']['p-value'].append(p_value)
    
    stat, t_value, p_value = perform_wilcoxon_test(df_t, df_t_plus_5, column)
    results['wilcoxon']['Measure'].append(column)
    results['wilcoxon']['Period'].append('t vs t+5')
    results['wilcoxon']['Statistic'].append(stat)
    results['wilcoxon']['t-value'].append(t_value)
    results['wilcoxon']['p-value'].append(p_value)


    #sign Test
    stat, p_value = perform_sign_test(df_t_minus_5, df_t, column)
    results['sign_test']['Measure'].append(column)
    results['sign_test']['Period'].append('t-5 vs t')
    results['sign_test']['Statistic'].append(stat)
    results['sign_test']['p-value'].append(p_value)
    
    stat, p_value = perform_sign_test(df_t, df_t_plus_5, column)
    results['sign_test']['Measure'].append(column)
    results['sign_test']['Period'].append('t vs t+5')
    results['sign_test']['Statistic'].append(stat)
    results['sign_test']['p-value'].append(p_value)
    
    #paired T-Test
    data_t_minus_5 = df_t_minus_5[column].dropna().values
    data_t = df_t[column].dropna().values
    data_t_plus_5 = df_t_plus_5[column].dropna().values
    min_length_t_minus_5_t = min(len(data_t_minus_5), len(data_t))
    min_length_t_t_plus_5 = min(len(data_t), len(data_t_plus_5))
    data_t_minus_5 = data_t_minus_5[:min_length_t_minus_5_t]
    data_t = data_t[:min_length_t_minus_5_t]
    data_t_plus_5 = data_t_plus_5[:min_length_t_t_plus_5]
    if len(data_t_minus_5) > 1 and len(data_t) > 1:
        stat, p_value = ttest_rel(data_t_minus_5, data_t)
        results['paired_ttest']['Measure'].append(column)
        results['paired_ttest']['Period'].append('t-5 vs t')
        results['paired_ttest']['Statistic'].append(stat)
        results['paired_ttest']['p-value'].append(p_value)
    else:
        results['paired_ttest']['Measure'].append(column)
        results['paired_ttest']['Period'].append('t-5 vs t')
        results['paired_ttest']['Statistic'].append(None)
        results['paired_ttest']['p-value'].append(None)
    if len(data_t) > 1 and len(data_t_plus_5) > 1:
        stat, p_value = ttest_rel(data_t, data_t_plus_5)
        results['paired_ttest']['Measure'].append(column)
        results['paired_ttest']['Period'].append('t vs t+5')
        results['paired_ttest']['Statistic'].append(stat)
        results['paired_ttest']['p-value'].append(p_value)
    else:
        results['paired_ttest']['Measure'].append(column)
        results['paired_ttest']['Period'].append('t vs t+5')
        results['paired_ttest']['Statistic'].append(None)
        results['paired_ttest']['p-value'].append(None)

#convert results to DataFrames
results_mann_whitney_df = pd.DataFrame(results['mann_whitney'])
results_wilcoxon_df = pd.DataFrame(results['wilcoxon'])
results_sign_test_df = pd.DataFrame(results['sign_test'])
results_paired_ttest_df = pd.DataFrame(results['paired_ttest'])

print(results_mann_whitney_df)

print(results_wilcoxon_df)

print(results_sign_test_df)

print(results_paired_ttest_df)

output_path = r"C:\UU\THESIS\AMX\HYPOTHESIS 2\DELETED\final\deleted222_significance_test_results_updated.xlsx"
with pd.ExcelWriter(output_path) as writer:
    results_mann_whitney_df.to_excel(writer, sheet_name='Mann-Whitney')
    results_wilcoxon_df.to_excel(writer, sheet_name='Wilcoxon')
    results_sign_test_df.to_excel(writer, sheet_name='Sign Test')
    results_paired_ttest_df.to_excel(writer, sheet_name='Paired T-Test')




In [None]:
import pandas as pd


file_path = r"C:\UU\THESIS\AMX\HYPOTHESIS 2\ADDED\ADDED - Copy.xlsx"

sheets = pd.read_excel(file_path, sheet_name=None)

aggregated_results = pd.DataFrame(columns=['years', 'profit_margin_adjusted', 'operating_margin_adjusted',
    'roa_adjusted', 'oibd_to_assets_adjusted', 'market_to_book_adjusted'])

for sheet_name, df in sheets.items():
    if df.empty:
        print(f"Sheet {sheet_name} is empty, skipping.")
        continue  # Skip empty sheets
    if 'years' not in df.columns:
        continue  # Skip sheets that do not have the required columns
    
    
    df['profit_margin_adjusted'] = pd.to_numeric(df['profit_margin_adjusted'], errors='coerce')
    df['operating_margin_adjusted'] = pd.to_numeric(df['operating_margin_adjusted'], errors='coerce')
    df['roa_adjusted'] = pd.to_numeric(df['roa_adjusted'], errors='coerce')
    df['oibd_to_assets_adjusted'] = pd.to_numeric(df['oibd_to_assets_adjusted'], errors='coerce')
    df['market_to_book_adjusted'] = pd.to_numeric(df['market_to_book_adjusted'], errors='coerce')

    #group by event year and calculate the mean for coverage and dispersion
    mean_proxies = df.groupby('years')[['profit_margin_adjusted', 'operating_margin_adjusted', 'roa_adjusted', 'oibd_to_assets_adjusted', 'market_to_book_adjusted']].mean()

    mean_proxies.reset_index(inplace=True)
    aggregated_results = pd.concat([aggregated_results, mean_proxies], ignore_index=True)

#group by event year again to get the overall mean across all companies
final_mean_proxies = aggregated_results.groupby('years').mean()

output_path = r"C:\UU\THESIS\AMX\HYPOTHESIS 2\ADDED\ADDED - Copy222_average_proxies_by_event_year.xlsx"
final_mean_proxies.to_excel(output_path)

print("Average proxies by event year saved to:", output_path)