In [38]:
import pandas as pd
from scipy.stats import mannwhitneyu

df = pd.read_excel("data5.xlsx", sheet_name=1, index_col='state_name', na_values='..')

df = df.groupby('state_name').filter(lambda x: x['year'].min() <= 2002 and x['year'].max() >= 2022)

df = df.drop(['Bahrain', 'Kuwait', 'Oman', 'Qatar', 'Saudi Arabia', 'Trinidad and Tobago', 
              'United Arab Emirates', 'Chile', 'Croatia', 'Hungary', 'Poland', 'Uruguay', 'Equatorial Guinea'])

df = df[df['region'] != 7]

cols = ['libdem_norm', 'property_rights', 'tax_burden', 'government_spending', 
        'business_freedom', 'monetary_freedom', 'trade_freedom', 'investment_freedom', 
        'financial_freedom', 'gov_effectiveness', 'corruption', 'pstab']

cols_1 = ['libdem_norm_diff', 'property_rights_diff', 'tax_burden_diff', 'government_spending_diff', 
        'business_freedom_diff', 'monetary_freedom_diff', 'trade_freedom_diff', 'investment_freedom_diff', 
        'financial_freedom_diff', 'gov_effectiveness_diff', 'corruption_diff', 'pstab_diff']

In [39]:
df_16_plus = df[df.groupby('state_name')['imf_pr'].transform('sum') >= 16]
df_0 = df[df.groupby('state_name')['imf_pr'].transform('sum') == 0]

In [40]:
df_16_plus_2002 = df_16_plus[df_16_plus['year'] == 2002]
df_0_2002 = df_0[df_0['year'] == 2002]

df_16_plus_2022 = df_16_plus[df_16_plus['year'] == 2022]
df_0_2022 = df_0[df_0['year'] == 2022]

df_16_plus_ = df_16_plus[df_16_plus['year'] > 2002]
df_0_ = df_0[df_0['year'] > 2002]

In [41]:
for col in cols:
    print(col)
    print(f'Среднее для группы 16+: {df_16_plus_2002[col].mean().round(2)} (2002); {df_16_plus_2022[col].mean().round(2)} (2022)')
    print(f'Среднее для группы 0 лет: {df_0_2002[col].mean().round(2)} (2002); {df_0_2022[col].mean().round(2)} (2022)')
    print()

libdem_norm
Среднее для группы 16+: 32.69 (2002); 37.57 (2022)
Среднее для группы 0 лет: 38.47 (2002); 28.77 (2022)

property_rights
Среднее для группы 16+: 41.67 (2002); 41.22 (2022)
Среднее для группы 0 лет: 41.76 (2002); 43.84 (2022)

tax_burden
Среднее для группы 16+: 70.06 (2002); 79.81 (2022)
Среднее для группы 0 лет: 69.79 (2002); 78.78 (2022)

government_spending
Среднее для группы 16+: 82.56 (2002); 81.92 (2022)
Среднее для группы 0 лет: 75.82 (2002); 75.07 (2022)

business_freedom
Среднее для группы 16+: 56.88 (2002); 48.79 (2022)
Среднее для группы 0 лет: 59.41 (2002); 56.45 (2022)

monetary_freedom
Среднее для группы 16+: 75.25 (2002); 74.43 (2022)
Среднее для группы 0 лет: 69.35 (2002); 63.8 (2022)

trade_freedom
Среднее для группы 16+: 58.79 (2002); 67.42 (2022)
Среднее для группы 0 лет: 59.59 (2002); 66.9 (2022)

investment_freedom
Среднее для группы 16+: 55.0 (2002); 58.44 (2022)
Среднее для группы 0 лет: 48.82 (2002); 45.89 (2022)

financial_freedom
Среднее для группы 

In [42]:
for col in cols:
    u_stat, p_value = mannwhitneyu(df_16_plus_2002[col], df_0_2002[col], alternative='two-sided')
    print(col)
    print(f"U-statistic: {u_stat:.4f}")
    print(f"P-value: {p_value:.4f}")
    print()

libdem_norm
U-statistic: 120.5000
P-value: 0.5889

property_rights
U-statistic: 135.5000
P-value: 1.0000

tax_burden
U-statistic: 138.0000
P-value: 0.9569

government_spending
U-statistic: 175.0000
P-value: 0.1644

business_freedom
U-statistic: 110.0000
P-value: 0.2887

monetary_freedom
U-statistic: 168.0000
P-value: 0.2565

trade_freedom
U-statistic: 153.5000
P-value: 0.5374

investment_freedom
U-statistic: 153.5000
P-value: 0.5060

financial_freedom
U-statistic: 161.5000
P-value: 0.3214

gov_effectiveness
U-statistic: 73.0000
P-value: 0.0244

corruption
U-statistic: 110.0000
P-value: 0.3583

pstab
U-statistic: 147.0000
P-value: 0.7053



In [43]:
for col in cols:
    u_stat, p_value = mannwhitneyu(df_16_plus_2022[col], df_0_2022[col], alternative='two-sided')
    print(col)
    print(f"U-statistic: {u_stat:.4f}")
    print(f"P-value: {p_value:.4f}")
    print()

libdem_norm
U-statistic: 170.0000
P-value: 0.2275

property_rights
U-statistic: 129.5000
P-value: 0.8289

tax_burden
U-statistic: 144.0000
P-value: 0.7870

government_spending
U-statistic: 195.5000
P-value: 0.0335

business_freedom
U-statistic: 91.0000
P-value: 0.1089

monetary_freedom
U-statistic: 159.5000
P-value: 0.4073

trade_freedom
U-statistic: 113.0000
P-value: 0.4175

investment_freedom
U-statistic: 177.5000
P-value: 0.1365

financial_freedom
U-statistic: 151.0000
P-value: 0.5948

gov_effectiveness
U-statistic: 84.0000
P-value: 0.0636

corruption
U-statistic: 157.0000
P-value: 0.4602

pstab
U-statistic: 111.0000
P-value: 0.3775



In [44]:
for col in cols_1:
    u_stat, p_value = mannwhitneyu(df_16_plus_[col], df_0_[col], alternative='two-sided')
    print(col)
    print(f"U-statistic: {u_stat:.4f}")
    print(f"P-value: {p_value:.4f}")
    print()

libdem_norm_diff
U-statistic: 60260.5000
P-value: 0.0166

property_rights_diff
U-statistic: 53922.5000
P-value: 0.8288

tax_burden_diff
U-statistic: 52693.0000
P-value: 0.4855

government_spending_diff
U-statistic: 53558.0000
P-value: 0.7310

business_freedom_diff
U-statistic: 52565.5000
P-value: 0.4525

monetary_freedom_diff
U-statistic: 55435.5000
P-value: 0.6724

trade_freedom_diff
U-statistic: 53430.0000
P-value: 0.6911

investment_freedom_diff
U-statistic: 56456.5000
P-value: 0.2921

financial_freedom_diff
U-statistic: 52809.5000
P-value: 0.2544

gov_effectiveness_diff
U-statistic: 56174.5000
P-value: 0.4687

corruption_diff
U-statistic: 58331.5000
P-value: 0.1083

pstab_diff
U-statistic: 50642.5000
P-value: 0.1249

