In [1]:
from randan.descriptive_statistics import ScaleStatistics # интервальная статистика
import pandas as pd # для обработки и анализа структурированных данных
import numpy as np# для корректной работы рандана 
from randan.bivariate_association import Correlation 

# Надежность-устойчивость: Индекс гендерного разрыва Всемирного экономического форума за 2013-2023 гг. (Gender Gap Index, GGI, World Economic Forum)

Массив данных находится в папке: https://disk.yandex.ru/d/PWOTilngObaAJg

Рейтинг за 2023 год на официальном сайте ВЭФ: https://www.weforum.org/publications/global-gender-gap-report-2023/ (отчеты с рейтингами выложены в ПДФ, таблица с данными создана на основе 10 отчетов)

In [2]:
# загружаем таблицу
df = pd.read_excel('wef_all.xlsx')

In [3]:
# дескриптивная статистика
# от 136 до 149 стран в рейтингах за разные годы
df.describe()

Unnamed: 0,2013,2014,2015,2016,2017,2018,2020,2021,2022,2023
count,136.0,142.0,145.0,144.0,144.0,149.0,153.0,156.0,146.0,146.0
mean,0.686394,0.692065,0.695014,0.694715,0.697,0.695409,0.700144,0.703878,0.70976,0.711966
std,0.060218,0.059289,0.059602,0.057786,0.058788,0.060735,0.061679,0.067826,0.065929,0.067697
min,0.5128,0.5145,0.484,0.516,0.516,0.499,0.494,0.444,0.435,0.405
25%,0.651675,0.652525,0.662,0.6635,0.669,0.662,0.665,0.6645,0.6705,0.676
50%,0.69125,0.69385,0.698,0.697,0.695,0.701,0.707,0.7105,0.71,0.711
75%,0.7177,0.7264,0.733,0.729,0.731,0.73,0.736,0.74225,0.749,0.76075
max,0.8731,0.8594,0.881,0.874,0.878,0.858,0.877,0.892,0.908,0.912


In [4]:
# удаляем пробелы
df = df.dropna()

In [5]:
# в таблице остается 121 страна без пробелов в данных (изначально было 161) 
df

Unnamed: 0,country,2013,2014,2015,2016,2017,2018,2020,2021,2022,2023
1,Albania,0.6412,0.6869,0.701,0.704,0.728,0.734,0.769,0.770,0.787,0.791
2,Algeria,0.5966,0.6182,0.632,0.642,0.629,0.629,0.634,0.633,0.602,0.573
3,Angola,0.6659,0.6311,0.637,0.643,0.640,0.633,0.660,0.657,0.638,0.656
4,Argentina,0.7195,0.7317,0.734,0.735,0.732,0.733,0.746,0.752,0.756,0.762
5,Armenia,0.6634,0.6622,0.668,0.669,0.677,0.678,0.684,0.673,0.698,0.721
...,...,...,...,...,...,...,...,...,...,...,...
150,United Arab Emirates,0.6372,0.6436,0.646,0.639,0.649,0.642,0.655,0.716,0.716,0.712
151,United Kingdom,0.7440,0.7383,0.758,0.752,0.770,0.774,0.767,0.775,0.780,0.792
152,United States,0.7392,0.7463,0.740,0.722,0.718,0.720,0.724,0.763,0.769,0.748
153,Uruguay,0.6803,0.6871,0.679,0.681,0.710,0.715,0.737,0.702,0.711,0.714


In [6]:
# проверяем нормальность распределения по каждому году
# тест Колмогорова-Смирнова
ss = ScaleStatistics(df, [2013], show_results=False, normality_test=True, normality_test_type='ks') 
ss.summary() 


NORMALITY TESTS
------------------



Unnamed: 0,statistic,p-value
2013,0.053,0.598


Unnamed: 0,N,mode,median,mean,25%,75%,interquart. range,interquart. range (norm.),min,max,range,std,var,entropy coef.,quality var.
2013,121.0,0.6803,0.6917,0.690719,0.6599,0.7218,0.0619,0.189181,0.5459,0.8731,0.3272,0.05777,0.003337,0.998058,0.999799


Распределение данных за каждый год ближе к нормальному (значение p-value > 0,05 для каждого года, нет повода отклонить гипотезу о нормальном распределении). Применим метод Пирсона и метод Спирмена (как для остальных рейтингов) для корреляционного анализа. 

In [7]:
# корреляционный анализ значений рейтинга по годам, метод Пирсона
corrp = Correlation(df, method = 'pearson', two_tailed = False, n_decimals=3)


CORRELATION SUMMARY (PEARSON METHOD, 1-TAILED)
------------------
The following variables were removed from the analysis since they do not belong to numerical dtypes: country



Unnamed: 0,Unnamed: 1,2013,2014,2015,2016,2017,2018,2020,2021,2022,2023
2013,Coefficient,1.0,0.962,0.952,0.94,0.914,0.906,0.886,0.878,0.86,0.87
2013,p-value,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013,N,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0
2014,Coefficient,0.962,1.0,0.969,0.957,0.942,0.934,0.909,0.903,0.893,0.891
2014,p-value,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2014,N,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0
2015,Coefficient,0.952,0.969,1.0,0.988,0.965,0.96,0.933,0.921,0.91,0.91
2015,p-value,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015,N,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0
2016,Coefficient,0.94,0.957,0.988,1.0,0.972,0.964,0.942,0.928,0.914,0.913


Note: Each empty index duplicates the previous one.
Maximum correlation is 0.988 (p-value 0.0) for variables 2015 and 2016,
minimum correlation is 0.86 (p-value 0.0) for variables 2013 and 2022.


In [8]:
# корреляционный анализ значений рейтинга по годам, метод Спирмена
corrs = Correlation(df, method = 'spearman', two_tailed = False, n_decimals=3)


CORRELATION SUMMARY (SPEARMAN METHOD, 1-TAILED)
------------------
The following variables were removed from the analysis since they do not belong to numerical dtypes: country



Unnamed: 0,Unnamed: 1,2013,2014,2015,2016,2017,2018,2020,2021,2022,2023
2013,Coefficient,1.0,0.947,0.935,0.924,0.884,0.88,0.853,0.839,0.824,0.823
2013,p-value,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013,N,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0
2014,Coefficient,0.947,1.0,0.954,0.943,0.917,0.907,0.873,0.872,0.872,0.865
2014,p-value,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2014,N,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0
2015,Coefficient,0.935,0.954,1.0,0.985,0.952,0.948,0.912,0.906,0.897,0.887
2015,p-value,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015,N,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0
2016,Coefficient,0.924,0.943,0.985,1.0,0.957,0.952,0.924,0.911,0.899,0.898


Note: Each empty index duplicates the previous one.
Maximum correlation is 0.985 (p-value 0.0) for variables 2015 and 2016,
minimum correlation is 0.823 (p-value 0.0) for variables 2013 and 2023.


In [9]:
# приводим в табличный вид для приложения в диссертации 
# метод Спирмена, для сравнения с другими рейтингами
corrs.correlation_matrix

Unnamed: 0,Unnamed: 1,2013,2014,2015,2016,2017,2018,2020,2021,2022,2023
2013,Coefficient,1.0,0.9467461,0.9351937,0.923913,0.8839963,0.8795865,0.8531651,0.8388486,0.8235758,0.8230212
2013,p-value,,1.0408740000000001e-60,8.752597999999999e-56,8.769879e-52,2.0801989999999998e-41,1.674126e-40,9.933106e-36,1.6141810000000002e-33,2.187795e-31,2.5914280000000003e-31
2013,N,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0
2014,Coefficient,0.9467461,1.0,0.9541005,0.9427691,0.9169879,0.9067529,0.8734568,0.8719977,0.8717266,0.8645715
2014,p-value,1.0408740000000001e-60,,1.8672599999999998e-64,6.721442e-59,1.270418e-49,9.43608e-47,2.6630140000000002e-39,5.0374530000000004e-39,5.66599e-39,1.1492689999999999e-37
2014,N,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0
2015,Coefficient,0.9351937,0.9541005,1.0,0.9846976,0.9516793,0.9478504,0.9120988,0.9063515,0.8969396,0.8873616
2015,p-value,8.752597999999999e-56,1.8672599999999998e-64,,1.883209e-92,3.703349e-63,3.090634e-61,3.3037319999999996e-48,1.203745e-46,2.7023269999999997e-44,3.998307e-42
2015,N,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0,121.0
2016,Coefficient,0.923913,0.9427691,0.9846976,1.0,0.9565691,0.9518322,0.9238889,0.9107971,0.899327,0.8980064


Значения коэффициента корреляции Спирмена составляют от 0,823 для 2013 и 2023 года до 0,985 для 2015 и 2016 года при p-value менее 0,05. Рейтинг ВЭФ "Индекс гендерного разрыва" обладает высокой надежностью-устойчивостью. 