In [1]:
from randan.descriptive_statistics import ScaleStatistics # интервальная статистика
import pandas as pd # для обработки и анализа структурированных данных
import numpy as np# для корректной работы рандана 
from randan.bivariate_association import Correlation 

# Надежность-устойчивость: Индекс гендерного развития Программы развития Организации Объединенных Наций за 2013-2022 гг. (Gender Development Index, GDI, United Nations Development Program)

Массив данных находится в папке: https://disk.yandex.ru/d/PWOTilngObaAJg

Рейтинг на официальном сайте ООН: https://hdr.undp.org/gender-development-index#/indicies/GDI 

In [2]:
# загружаем таблицу
df = pd.read_excel('un_gdi.xlsx')

In [3]:
# дескриптивная статистика
# Данные за 2013-2022 годы, данные по 170-182 странам за разные годы. 
df.describe()

Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
count,170.0,171.0,176.0,176.0,176.0,180.0,181.0,181.0,181.0,182.0
mean,0.942606,0.945175,0.94496,0.947011,0.946983,0.947056,0.948785,0.95153,0.951011,0.948934
std,0.064796,0.065278,0.065845,0.067506,0.067999,0.069264,0.06795,0.067959,0.068641,0.071218
min,0.62,0.584,0.547,0.509,0.473,0.468,0.464,0.464,0.457,0.456
25%,0.9125,0.917,0.9175,0.91675,0.9195,0.9185,0.926,0.927,0.927,0.926
50%,0.9625,0.963,0.963,0.965,0.966,0.967,0.969,0.97,0.972,0.9705
75%,0.986,0.988,0.98625,0.988,0.988,0.989,0.99,0.99,0.991,0.991
max,1.035,1.05,1.05,1.05,1.057,1.065,1.072,1.07,1.069,1.064


In [4]:
# удаляем пробелы
df = df.dropna()

In [5]:
# в таблице остается 170 стран без пробелов в данных (изначально было 194 страны) 
df

Unnamed: 0,country,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Afghanistan,0.667,0.657,0.682,0.685,0.696,0.701,0.700,0.695,0.717,0.622
1,Albania,0.963,0.958,0.969,0.968,0.966,0.970,0.972,0.978,0.981,0.977
2,Algeria,0.858,0.857,0.865,0.869,0.872,0.873,0.877,0.879,0.882,0.881
4,Angola,0.860,0.871,0.877,0.892,0.896,0.902,0.889,0.906,0.898,0.905
6,Argentina,0.999,0.996,0.998,0.994,0.994,0.998,0.998,0.999,0.990,0.995
...,...,...,...,...,...,...,...,...,...,...,...
190,Venezuela (Bolivarian Republic of),1.021,1.022,1.021,1.020,1.018,1.015,1.006,1.001,1.002,1.002
191,Viet Nam,1.003,1.004,1.006,1.007,1.005,1.005,1.006,1.003,1.004,1.007
192,Yemen,0.620,0.584,0.547,0.509,0.473,0.468,0.464,0.464,0.457,0.456
193,Zambia,0.899,0.901,0.904,0.903,0.934,0.940,0.928,0.945,0.939,0.930


In [6]:
# проверяем нормальность распределения для первого года (2013)
# тест Колмогорова-Смирнова
ss = ScaleStatistics(df, [2013], show_results=False, normality_test=True, normality_test_type='ks') 
ss.summary() 


NORMALITY TESTS
------------------



Unnamed: 0,statistic,p-value
2013,0.137,0.0


Unnamed: 0,N,mode,median,mean,25%,75%,interquart. range,interquart. range (norm.),min,max,range,std,var,entropy coef.,quality var.
2013,170.0,0.981,0.9625,0.942606,0.9125,0.986,0.0735,0.177108,0.62,1.035,0.415,0.064796,0.004199,0.971994,0.99708


Данные за 2013 год не имеют нормального распределения (значение теста Колмогорова-Смирнова 0.137 при p-value < 0,05). Применим метод Спирмена для корреляционного анализа. 

In [7]:
# корреляционный анализ значений рейтинга по годам
corr = Correlation(df, method = 'spearman', two_tailed = False, n_decimals=3)


CORRELATION SUMMARY (SPEARMAN METHOD, 1-TAILED)
------------------
The following variables were removed from the analysis since they do not belong to numerical dtypes: country



Unnamed: 0,Unnamed: 1,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
2013,Coefficient,1.0,0.977,0.989,0.968,0.973,0.975,0.956,0.951,0.941,0.949
2013,p-value,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2013,N,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0
2014,Coefficient,0.977,1.0,0.977,0.954,0.961,0.961,0.943,0.948,0.927,0.932
2014,p-value,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2014,N,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0
2015,Coefficient,0.989,0.977,1.0,0.971,0.98,0.983,0.966,0.964,0.951,0.956
2015,p-value,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2015,N,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0
2016,Coefficient,0.968,0.954,0.971,1.0,0.967,0.967,0.957,0.944,0.933,0.938


Note: Each empty index duplicates the previous one.
Maximum correlation is 0.989 (p-value 0.0) for variables 2013 and 2015,
minimum correlation is 0.927 (p-value 0.0) for variables 2014 and 2021.


In [8]:
# приводим в табличный вид для приложения в диссертации
corr.correlation_matrix

Unnamed: 0,Unnamed: 1,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
2013,Coefficient,1.0,0.9768814,0.9887112,0.9679436,0.9733717,0.9745717,0.9563878,0.9514752,0.9412164,0.9489262
2013,p-value,,8.566961e-115,9.882219e-141,4.960509e-103,1.060722e-109,2.317836e-111,5.200234e-92,3.314048e-88,2.134672e-81,2.195684e-86
2013,N,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0
2014,Coefficient,0.9768814,1.0,0.9773445,0.9535961,0.9611653,0.9613446,0.9429254,0.9480142,0.9270905,0.9317183
2014,p-value,8.566961e-115,,1.595507e-115,8.485814e-90,3.718006e-96,2.539111e-96,1.924683e-82,9.345195e-86,8.42734e-74,4.156854e-76
2014,N,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0
2015,Coefficient,0.9887112,0.9773445,1.0,0.9712126,0.9801603,0.9834502,0.9659224,0.9643085,0.9507935,0.95598
2015,p-value,9.882219e-141,1.595507e-115,,6.771773e-107,2.5825070000000002e-120,7.189682e-127,7.753347e-101,3.533691e-99,1.03974e-87,1.1172679999999999e-91
2015,N,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0
2016,Coefficient,0.9679436,0.9535961,0.9712126,1.0,0.9671329,0.967247,0.9566531,0.9444746,0.9329382,0.9376894


Значения коэффициента корреляции Спирмена составляют от 0,927 для 2013 и 2015 года до 0,989 для 2014 и 2021 года при p-value менее 0,05. Рейтинг ООН "Индекс гендерного развития" обладает высокой надежностью-устойчивостью. 