In [1]:
from randan.descriptive_statistics import ScaleStatistics # интервальная статистика
import pandas as pd # для обработки и анализа структурированных данных
import numpy as np# для корректной работы рандана 
from randan.bivariate_association import Correlation 

# Надежность-устойчивость: Индекс гендерного равенства Евросоюза за 2013-2023 гг. (Gender Equality Index, GEI, European Union)

Массив данных находится в папке: https://disk.yandex.ru/d/PWOTilngObaAJg 

Рейтинг за 2023 год на официальном сайте Евросоюза: https://eige.europa.eu/gender-equality-index/2023 

In [2]:
# загружаем таблицу
df = pd.read_excel('eu.xlsx')

In [3]:
# в таблице 27 стран
df

Unnamed: 0,country,2013,2015,2017,2019,2020,2021,2022
0,Belgium,69.255331,70.187769,70.47456,71.1,71.441687,72.726502,74.196806
1,Bulgaria,55.038562,56.948777,57.994036,58.8,59.606546,59.858294,60.695323
2,Czech Republic,55.569285,56.74063,53.611364,55.7,56.244457,56.715098,57.239282
3,Denmark,75.224597,75.598727,76.787951,77.5,77.447242,77.763297,77.832795
4,Germany,62.605909,64.910869,65.477186,66.9,67.466058,68.551877,68.664431
5,Estonia,53.367682,53.450433,56.724697,59.8,60.666913,61.584712,61.025263
6,Ireland,65.383817,67.685833,69.499139,71.3,72.231582,73.142196,74.330251
7,Greece,48.614853,50.055128,49.952303,51.2,52.235349,52.518726,53.415973
8,Spain,66.379688,67.359214,68.347626,70.1,71.962767,73.663651,74.554945
9,France,67.504696,68.88626,72.582132,74.6,75.12239,75.546276,75.140792


In [4]:
# дескриптивная статистика
df.describe()

Unnamed: 0,2013,2015,2017,2019,2020,2021,2022
count,27.0,27.0,27.0,27.0,27.0,27.0,27.0
mean,59.748642,61.034586,62.379293,63.596296,64.458959,65.305782,65.873713
std,8.785523,8.802987,8.917432,8.67146,8.577378,8.680998,8.678343
min,48.614853,50.055128,49.952303,51.2,52.235349,52.518726,53.415973
25%,53.316605,53.803194,55.560919,55.65,56.617276,57.700567,59.127329
50%,55.457806,56.948777,60.094681,62.5,63.393367,63.77963,65.04692
75%,65.881753,67.522523,69.255864,70.6,71.702227,72.934349,74.263528
max,80.1324,79.722271,82.569245,83.6,83.829444,83.937597,83.946452


In [5]:
# проверяем нормальность распределения по каждому году
# тест Колмогорова-Смирнова
ss = ScaleStatistics(df, [2013], show_results=False, normality_test=True, normality_test_type='ks') 
ss.summary() 


NORMALITY TESTS
------------------



Unnamed: 0,statistic,p-value
2013,0.238,0.0


Unnamed: 0,N,mode,median,mean,25%,75%,interquart. range,interquart. range (norm.),min,max,range,std,var,entropy coef.,quality var.
2013,27.0,48.614853,55.457806,59.748642,53.316605,65.881753,12.565148,0.398671,48.614853,80.1324,31.517547,8.785523,77.185421,1.0,1.0


Распределение данных за 2013 год не является нормальным (коэффициент 0,238 при значении p-value < 0,05). Применим метод Спирмена (как и для остальных рейтингов) для корреляционного анализа. 

In [6]:
# корреляционный анализ значений рейтинга по годам, метод Пирсона
corr = Correlation(df, method = 'spearman', two_tailed = False, n_decimals=3)


CORRELATION SUMMARY (SPEARMAN METHOD, 1-TAILED)
------------------
The following variables were removed from the analysis since they do not belong to numerical dtypes: country



Unnamed: 0,Unnamed: 1,2013,2015,2017,2019,2020,2021,2022
2013,Coefficient,1.0,0.974,0.926,0.884,0.88,0.888,0.896
2013,p-value,,0.0,0.0,0.0,0.0,0.0,0.0
2013,N,27.0,27.0,27.0,27.0,27.0,27.0,27.0
2015,Coefficient,0.974,1.0,0.965,0.928,0.926,0.929,0.938
2015,p-value,0.0,,0.0,0.0,0.0,0.0,0.0
2015,N,27.0,27.0,27.0,27.0,27.0,27.0,27.0
2017,Coefficient,0.926,0.965,1.0,0.963,0.962,0.958,0.963
2017,p-value,0.0,0.0,,0.0,0.0,0.0,0.0
2017,N,27.0,27.0,27.0,27.0,27.0,27.0,27.0
2019,Coefficient,0.884,0.928,0.963,1.0,0.995,0.988,0.981


Note: Each empty index duplicates the previous one.
Maximum correlation is 0.996 (p-value 0.0) for variables 2021 and 2022,
minimum correlation is 0.88 (p-value 0.0) for variables 2013 and 2020.


In [7]:
# приводим в табличный вид для приложения в диссертации 
# метод Спирмена, для сравнения с другими рейтингами
corr.correlation_matrix

Unnamed: 0,Unnamed: 1,2013,2015,2017,2019,2020,2021,2022
2013,Coefficient,1.0,0.9737485,0.9255189,0.8840049,0.8797314,0.8876679,0.8956044
2013,p-value,,6.900112e-18,2.432085e-12,4.906474e-10,7.529439e-10,3.353382e-10,1.402709e-10
2013,N,27.0,27.0,27.0,27.0,27.0,27.0,27.0
2015,Coefficient,0.9737485,1.0,0.964591,0.9279609,0.9255189,0.9285714,0.9377289
2015,p-value,6.900112e-18,,2.766228e-16,1.624943e-12,2.432085e-12,1.465863e-12,2.773832e-13
2015,N,27.0,27.0,27.0,27.0,27.0,27.0,27.0
2017,Coefficient,0.9255189,0.964591,1.0,0.9627595,0.962149,0.958486,0.96337
2017,p-value,2.432085e-12,2.766228e-16,,5.144677e-16,6.283423e-16,1.954533e-15,4.198175e-16
2017,N,27.0,27.0,27.0,27.0,27.0,27.0,27.0
2019,Coefficient,0.8840049,0.9279609,0.9627595,1.0,0.995116,0.98779,0.9810745


Значения коэффициента корреляции Спирмена составляют от 0,88 для 2013 и 2020 года до 0,996 для 2021 и 2022 года при p-value менее 0,05. Рейтинг Евросоюза "Индекс гендерного равенства" обладает высокой надежностью-устойчивостью. 