<a href="https://colab.research.google.com/github/oneryigit/network_analysis/blob/main/Cato_questions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Oner Yigit
# 2/16/2025



In [1]:
import numpy as np
import pandas as pd


## Question 1

In [2]:
# gini index (unweighted) formula

def gini_index(incomes):
    # Sorting incomes
    incomes = np.sort(np.asarray(incomes))
    #population size
    n = incomes.size
    # cumulative sums of incomes
    cum_incomes = np.cumsum(incomes)
    #  cumulative income positions
    pos_sums = np.sum(cum_incomes)


    # Gini index calculation
    gini = (n + 1 - 2 * (pos_sums / cum_incomes[-1])) / n
    return gini

In [3]:
#lets use an example:

country= 'Russia'

incomes = [0, 0, 0, 0, 100]
print("Gini Index of " f'{country} ' "is", gini_index(incomes))

# income inequality is very high as among 5 people 4 people receive no income
# while one person gets all the income.


Gini Index of Russia is 0.8


In [4]:
# mean log deviation

def mld(incomes):

    incomes = np.asarray(incomes)

    incomes[incomes == 0] = 1  # log 0 is undefined

    #size
    n = len(incomes)
    #mean
    mu = np.mean(incomes)
    mld = np.mean(np.log(mu / incomes))

    return mld

In [5]:
#lets use an example:

# Four people have an income of 1 and one person has 100.

incomes = [1, 1, 1, 1, 100]

results = mld(incomes)
results

2.113918949509654

In [6]:
# weighted

def w_gini_index(incomes, we):
    incomes = np.array(incomes)
    we = np.array(we)

    # Get positions of incomes
    indices = np.argsort(incomes)
    incomes_sorted = incomes[indices]
    we_sorted = we[indices]

    # Weighted cumulative sums
    we_sum = np.sum(incomes_sorted * we_sorted)
    tot_weight = np.sum(we_sorted)
    we_cum_tot = np.cumsum(we_sorted * incomes_sorted)
    we_pos_sums = np.sum(we_sorted * we_cum_tot / we_sum)

    # Weighted Gini calculation
    gini = (tot_weight + 1 - 2 * we_pos_sums) / tot_weight
    return gini


In [7]:
# example
incomes = [1, 1, 1, 1, 100] # income of a family

we = [10, 10, 10, 10, 5] # household size

weighted_gini = w_gini_index(incomes, we)

weighted_gini

# the large families have an income of 1 while a family of 5 has income of 100.
# the result shows a high income inequality weighted by household size.

0.7176954732510288

## Question 2

In [8]:
# calculating human developmet index

def calc_hdi(le, mys, eys, gni):

    # Constants UNDP min and max values
    min_le, max_le = 20, 85 #life_expectancy min life_expectancy max
    min_sch, max_mys = 0, 15 # min schooling, max mean schooling
    max_eys = 18 # max_expected_schooling
    min_gni, max_gni = 100, 75000 #GNI per capita

    # Health index formula
    h_idx = (le - min_le) / (max_le - min_le)

    # Education index formula
    mys_idx = (mys - min_sch) / (max_mys - min_sch)
    eys_idx = (eys - min_sch) / (max_eys - min_sch)
    ed_idx = (mys_idx + eys_idx) / 2

    # Income index formula
    i_idx = (np.log(gni) - np.log(min_gni)) / (np.log(max_gni) - np.log(min_gni))

    # HDI calculation
    hdi = (h_idx * ed_idx * i_idx) ** (1/3)
    return hdi


In [9]:
# Example Country X
le = 72
mys = 12
eys = 16
gni = 40000

# HDI
hdi = calc_hdi(le, mys, eys, gni)
print("The Country X's HDI is:", hdi)

The Country X's HDI is: 0.8487447297367349


## Question 3

In [10]:
def calc_75(data, pops, regions):
    results = {}
    for region, countries in regions.items():
        tot_pop = sum(pops.get(country, 0) for country in countries)
        data_pop = sum(pops.get(country, 0) for country in countries if country in data)
        results[region] = (data_pop / tot_pop) * 100 if tot_pop > 0 else 0
    return results


In [11]:
# Example Region

data = {'Country1': True, 'Country2': True}  # Countries with data available
pops = {'Country1': 100000, 'Country2': 200000, 'Country3': 150000}
regions = {'Region1': ['Country1', 'Country2', 'Country3']}

# Calculate representativeness
rep = calc_75(data, pops, regions)
print(rep)

#'Region1': 66.666
# Region1 does not meet the threshold for regional calculations


{'Region1': 66.66666666666666}
