In [1]:
from randan.descriptive_statistics import ScaleStatistics # интервальная статистика
import pandas as pd # для обработки и анализа структурированных данных
import numpy as np# для корректной работы рандана 
from randan.bivariate_association import Correlation 

# Надежность-устойчивость: Индекс гендерного неравенства Программы развития Организации Объединенных Наций за 2013-2022 гг. (Gender Inequality Index, GII, United Nations Development Program)

Массив данных находится в папке: https://disk.yandex.ru/d/PWOTilngObaAJg

Рейтинг на официальном сайте ООН: https://hdr.undp.org/data-center/thematic-composite-indices/gender-inequality-index#/indicies/GII

In [2]:
# загружаем таблицу
df = pd.read_excel('un_gii.xlsx')

In [3]:
# дескриптивная статистика
# данные за 2013-2022 годы, от 155 до 166 стран с доступной статистикой
df.describe()

Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
count,155.0,155.0,158.0,159.0,159.0,163.0,165.0,165.0,165.0,166.0
mean,0.365819,0.36231,0.359462,0.353283,0.345214,0.345123,0.341509,0.338873,0.339673,0.334711
std,0.191434,0.190345,0.189886,0.191886,0.193054,0.194823,0.195887,0.195881,0.198099,0.194808
min,0.039,0.035,0.033,0.03,0.023,0.018,0.016,0.013,0.013,0.009
25%,0.192,0.1875,0.195,0.193,0.184,0.1775,0.166,0.184,0.18,0.17725
50%,0.378,0.378,0.377,0.365,0.361,0.365,0.355,0.359,0.352,0.351
75%,0.5255,0.526,0.5225,0.511,0.509,0.5055,0.497,0.497,0.496,0.49325
max,0.796,0.803,0.811,0.838,0.821,0.819,0.816,0.815,0.821,0.82


In [4]:
# удаляем пробелы
df = df.dropna()

In [5]:
# в таблице остается 155 стран без пробелов в данных (изначально было 194) 
df

Unnamed: 0,country,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Afghanistan,0.712,0.692,0.692,0.688,0.681,0.678,0.676,0.674,0.653,0.665
1,Albania,0.191,0.186,0.178,0.161,0.145,0.139,0.131,0.129,0.117,0.116
2,Algeria,0.422,0.417,0.409,0.407,0.417,0.405,0.397,0.396,0.457,0.460
4,Angola,0.549,0.553,0.548,0.543,0.547,0.537,0.533,0.529,0.529,0.520
6,Argentina,0.356,0.349,0.340,0.337,0.325,0.303,0.291,0.303,0.296,0.292
...,...,...,...,...,...,...,...,...,...,...,...
190,Venezuela (Bolivarian Republic of),0.494,0.489,0.493,0.513,0.497,0.497,0.497,0.522,0.522,0.521
191,Viet Nam,0.378,0.380,0.384,0.380,0.384,0.389,0.391,0.386,0.378,0.378
192,Yemen,0.796,0.803,0.811,0.809,0.809,0.808,0.789,0.787,0.821,0.820
193,Zambia,0.574,0.571,0.562,0.542,0.545,0.529,0.521,0.522,0.526,0.526


In [6]:
# проверяем нормальность распределения для первого года (2013)
# тест Колмогорова-Смирнова
ss = ScaleStatistics(df, [2013], show_results=False, normality_test=True, normality_test_type='ks') 
ss.summary() 


NORMALITY TESTS
------------------



Unnamed: 0,statistic,p-value
2013,0.082,0.013


Unnamed: 0,N,mode,median,mean,25%,75%,interquart. range,interquart. range (norm.),min,max,range,std,var,entropy coef.,quality var.
2013,155.0,0.549,0.378,0.365819,0.192,0.5255,0.3335,0.440555,0.039,0.796,0.757,0.191434,0.036647,0.987857,0.998811


Данные за 2013 год не имеют нормального распределения (значение теста Колмогорова-Смирнова 0.082 при p-value < 0,05). Применим метод Спирмена для корреляционного анализа. 

In [7]:
# корреляционный анализ значений рейтинга по годам
# метод Спирмена
corr = Correlation(df, method = 'spearman', two_tailed = False, n_decimals=3)


CORRELATION SUMMARY (SPEARMAN METHOD, 1-TAILED)
------------------
The following variables were removed from the analysis since they do not belong to numerical dtypes: country



Unnamed: 0,Unnamed: 1,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
2013,Coefficient,1.0,0.998,0.996,0.992,0.97,0.969,0.969,0.967,0.969,0.97
2013,p-value,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013,N,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0
2014,Coefficient,0.998,1.0,0.998,0.994,0.972,0.972,0.971,0.968,0.971,0.971
2014,p-value,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2014,N,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0
2015,Coefficient,0.996,0.998,1.0,0.997,0.975,0.975,0.974,0.972,0.975,0.975
2015,p-value,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015,N,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0
2016,Coefficient,0.992,0.994,0.997,1.0,0.979,0.978,0.977,0.975,0.978,0.978


Note: Each empty index duplicates the previous one.
Maximum correlation is 0.998 (p-value 0.0) for variables 2014 and 2015,
minimum correlation is 0.967 (p-value 0.0) for variables 2013 and 2020.


In [8]:
# приводим в табличный вид для приложения в диссертации
corr.correlation_matrix

Unnamed: 0,Unnamed: 1,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
2013,Coefficient,1.0,0.997707,0.9958072,0.9915749,0.9696853,0.9694137,0.9693237,0.967142,0.9691699,0.9703973
2013,p-value,,3.7137150000000004e-181,3.8895799999999995e-161,5.085529e-138,7.743433e-96,1.516064e-95,1.891667e-95,3.339323e-93,2.757633e-95,1.2910790000000001e-96
2013,N,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0
2014,Coefficient,0.997707,1.0,0.9982984,0.9944022,0.9724024,0.971857,0.9707333,0.9682156,0.9714898,0.9714878
2014,p-value,3.7137150000000004e-181,,4.667415e-191,1.477052e-151,6.510752000000001e-99,2.850439e-98,5.460135e-97,2.73912e-94,7.576986e-98,7.618639e-98
2014,N,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0
2015,Coefficient,0.9958072,0.9982984,1.0,0.9965855,0.9749556,0.974925,0.9740446,0.9719112,0.9751016,0.9748723
2015,p-value,3.8895799999999995e-161,4.667415e-191,,6.031122999999999e-168,4.268418e-102,4.6811749999999995e-102,6.344653000000001e-101,2.464755e-98,2.74377e-102,5.484994e-102
2015,N,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0,155.0
2016,Coefficient,0.9915749,0.9944022,0.9965855,1.0,0.97907,0.9781338,0.9766523,0.9751731,0.9782459,0.9781946


Значения коэффициента корреляции Спирмена составляют от 0,967 для 2013 и 2020 года до 0,998 для 2014 и 2021 года при p-value менее 0,05. Рейтинг ООН "Индекс гендерного неравенства" обладает высокой надежностью-устойчивостью. 