# Setup 

In [2]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency

# independence test implementation 
def find_value_smaller_than_5(table):
    for i in range(len(table)):
        for j in range(len(table[0])):
            if table[i][j]<=5:
                return True, j
    return False, 0

def collapse_index(table):
    exist, collapse_ind = find_value_smaller_than_5(table)
    print_once = True
    while exist: 
#         if print_once:
#             print("table has value smaller than 5")
#             print_once = False
        new_table = []
        if collapse_ind<len(table[0])-1:
            move_to = collapse_ind
        else:
            move_to = collapse_ind - 1
                
        for row in table: 
            new_row = row[:]
            val = new_row.pop(collapse_ind)
            new_row[move_to] += val
            new_table.append(new_row)
        table = new_table[:]
        exist, collapse_ind = find_value_smaller_than_5(table)
    
    return table

def chi_square_analysis(param, treatment):
    """
    for descrete property and binary treatment 
    """
    treatment_options = [[] for k in treatment.dropna().unique()]
    frame = pd.DataFrame({'treatment': treatment, 'property':param})
    frame = frame.dropna().copy()
    

    for val in frame["property"].unique(): 
        for i in range(len(treatment_options)):
            treatment_options[i].append(((frame["property"]==val) & (frame["treatment"]==i)).sum())
    
    # print(treatment_options)
    treatment_options = collapse_index(treatment_options)
    # print(treatment_options)
    
    obs = np.array(treatment_options)
    return chi2_contingency(obs)[:2]

## Load Country Data
To see results for specific country, uncomment the desired country and go to Kernel--> Restart and run all. 

In [3]:
df = pd.read_csv("PISA2018_data.csv")

# extract Australia-based sample 
# country = df[(df["CNT"]=="AUS") & (df["DURECEC"]<7)].copy()

# extract Ireland-based sample
country = df[(df["CNT"]=='IRL') & (df["LANGTEST_QQQ"] == 313) & (df["DURECEC"]<7)].copy()

# extract English-speaking student population in canada
# country = df[(df["CNT"]=="CAN") & (df["LANGTEST_QQQ"] == 313) & (df["DURECEC"]<7)].copy()

country["Treatment 1"] = country["DURECEC"] > 0
country["Treatment 2"] = country["DURECEC"] > 1
country.shape

(3228, 30)

# Gender 
The variable ST004D01T represens student gender. The value 1 represents a female while 2 represents a male. 

In [3]:
def gender_dependency_test(country, treatment):
    chi_square, pi_val = chi_square_analysis(country["ST004D01T"], country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    gender = country.groupby(["ST004D01T"])[treatment].agg(['sum', 'mean']).rename(index={1:"Female", 2: "Male"})
    return gender.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                                'mean': '{} share'.format(treatment),
                                                'ST004D01T':'gender'})

In [52]:
# Treatment 1 analysis
gender_dependency_test(country, "Treatment 1")

Chi square: 0.702162505615799, p-value:0.4020580136955434


Unnamed: 0,gender,Treatment 1 No.,Treatment 1 share
0,Female,1674,0.974389
1,Male,1463,0.968874


In [5]:
# Treatment 2 analysis
gender_dependency_test(country, "Treatment 2")

Chi square: 4.748117749671582, p-value:0.029330359952351885


Unnamed: 0,gender,Treatment 2 No.,Treatment 2 share
0,Female,968,0.563446
1,Male,909,0.601987


# Year of birth 
This covariant is irrelevant for the Canada and Ireland samples, as all students in these samples were born in 2002.

In [6]:
def birth_year_dependency_test(country, treatment):
    chi_square, pi_val = chi_square_analysis(country["ST003D03T"], country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    year = country.groupby(["ST003D03T"])[treatment].agg(['sum', 'mean'])
    return year.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                                'mean': '{} share'.format(treatment),
                                                'ST003D03T':'Year of birth'})

In [7]:
# Treatment 1 analysis
birth_year_dependency_test(country, "Treatment 1")

Chi square: 0.0, p-value:1.0


Unnamed: 0,Year of birth,Treatment 1 No.,Treatment 1 share
0,2002.0,3137,0.971809


In [8]:
# Treatment 2 analysis
birth_year_dependency_test(country, "Treatment 2")

Chi square: 0.0, p-value:1.0


Unnamed: 0,Year of birth,Treatment 2 No.,Treatment 2 share
0,2002.0,1877,0.581475


# Month of birth

In [9]:
def birth_month_dependency_test(country, treatment):
    chi_square, pi_val = chi_square_analysis(country["ST003D02T"], country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    month = country.groupby(["ST003D02T"])[treatment].agg(['sum', 'mean'])
    return month.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                                'mean': '{} share'.format(treatment),
                                                'ST003D02T':'Month of birth'})

In [10]:
# Treatment 1 analysis
birth_month_dependency_test(country, 'Treatment 1')

Chi square: 4.112266648121012, p-value:0.9421403286514025


Unnamed: 0,Month of birth,Treatment 1 No.,Treatment 1 share
0,1.0,235,0.955285
1,2.0,200,0.966184
2,3.0,260,0.977444
3,4.0,249,0.968872
4,5.0,263,0.97048
5,6.0,270,0.971223
6,7.0,295,0.973597
7,8.0,254,0.97318
8,9.0,312,0.975
9,10.0,273,0.975


In [11]:
# treatment 2 analysis
birth_month_dependency_test(country, 'Treatment 2')

Chi square: 65.37113550428629, p-value:9.175677780628961e-10


Unnamed: 0,Month of birth,Treatment 2 No.,Treatment 2 share
0,1.0,132,0.536585
1,2.0,111,0.536232
2,3.0,119,0.447368
3,4.0,152,0.59144
4,5.0,178,0.656827
5,6.0,167,0.600719
6,7.0,197,0.650165
7,8.0,187,0.716475
8,9.0,193,0.603125
9,10.0,140,0.5


# Immigration Status

In [12]:
def immigration_dependency_test(country, treatment):
    chi_square, pi_val = chi_square_analysis(country['IMMIG'], country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    immig = country.groupby(["IMMIG"])[treatment].agg(['sum', 'mean']).rename(index={1:"Native", 
                                                                                  2:"Second Generation",
                                                                                  3: "First Generation"})
    return immig.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                                'mean': '{} share'.format(treatment)})

In [53]:
# Treatment 1 analysis
immigration_dependency_test(country, "Treatment 1")

Chi square: 0.4346931163548648, p-value:0.8046510625733742


Unnamed: 0,IMMIG,Treatment 1 No.,Treatment 1 share
0,Native,2721,0.97248
1,Second Generation,194,0.97
2,First Generation,222,0.965217


In [14]:
# Treatment 2 analysis
immigration_dependency_test(country, "Treatment 2")

Chi square: 4.8875225320450575, p-value:0.08683363207822531


Unnamed: 0,IMMIG,Treatment 2 No.,Treatment 2 share
0,Native,1607,0.574339
1,Second Generation,122,0.61
2,First Generation,148,0.643478


# Students International Grade

In [15]:
def grade_dependency_test(country, treatment):
    chi_square, pi_val = chi_square_analysis(country['ST001D01T'], country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    grade = country.groupby(['ST001D01T'])[treatment].agg(['sum', 'mean'])
    return grade.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                                'mean': '{} share'.format(treatment),
                                                'ST001D01T':"Grade"})

In [16]:
# Treatment 1 analysis
grade_dependency_test(country, "Treatment 1")

Chi square: 1.049082143017705, p-value:0.5918269082824679


Unnamed: 0,Grade,Treatment 1 No.,Treatment 1 share
0,7.0,1,1.0
1,8.0,43,1.0
2,9.0,2055,0.973934
3,10.0,826,0.967213
4,11.0,212,0.963636


In [17]:
# Treatment 2 analysis
grade_dependency_test(country, "Treatment 2")

Chi square: 57.553350413732886, p-value:1.9578641847446754e-12


Unnamed: 0,Grade,Treatment 2 No.,Treatment 2 share
0,7.0,0,0.0
1,8.0,32,0.744186
2,9.0,1319,0.625118
3,10.0,420,0.491803
4,11.0,106,0.481818


# Mother Education (ISCED level)

In [18]:
def education_independence_test(country, covariate, treatment):
    chi_square, pi_val = chi_square_analysis(country[covariate], country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    edu= country.groupby([covariate])[treatment].agg(['sum', 'mean']).rename(index={0:'None',1: 'ISCED 1',
                                                                                   2: 'ISCED 2', 3: 'ISCED 3B,C',
                                                                                   4: 'ISCED 3A, ISCED 4', 
                                                                                   5: 'ISCED 5B', 
                                                                                   6: 'ISCED 5A, 6'})
    return edu.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                             'mean': '{} share'.format(treatment)})

In [19]:
# Treatment 1 analysis
education_independence_test(country, 'MISCED', "Treatment 1")

Chi square: 6.622465356228217, p-value:0.08495589574337858


Unnamed: 0,MISCED,Treatment 1 No.,Treatment 1 share
0,,7,1.0
1,ISCED 1,28,0.933333
2,ISCED 2,191,0.979487
3,"ISCED 3B,C",75,0.9375
4,"ISCED 3A, ISCED 4",996,0.965116
5,ISCED 5B,617,0.985623
6,"ISCED 5A, 6",1223,0.972178


In [20]:
# Treatment 2 Analysis
education_independence_test(country, 'MISCED', "Treatment 2")

Chi square: 20.34048865413816, p-value:0.0010785645487448423


Unnamed: 0,MISCED,Treatment 2 No.,Treatment 2 share
0,,5,0.714286
1,ISCED 1,17,0.566667
2,ISCED 2,96,0.492308
3,"ISCED 3B,C",47,0.5875
4,"ISCED 3A, ISCED 4",559,0.541667
5,ISCED 5B,383,0.611821
6,"ISCED 5A, 6",770,0.612083


# Father Education (ISCED level)

In [21]:
# Treatment 1 analysis
education_independence_test(country, 'FISCED', "Treatment 1")

Chi square: 3.396711197700789, p-value:0.4937563762917513


Unnamed: 0,FISCED,Treatment 1 No.,Treatment 1 share
0,,9,0.9
1,ISCED 1,74,0.936709
2,ISCED 2,331,0.967836
3,"ISCED 3B,C",65,0.984848
4,"ISCED 3A, ISCED 4",1023,0.969668
5,ISCED 5B,576,0.979592
6,"ISCED 5A, 6",1059,0.973346


In [22]:
# Treatment Analysis 2
education_independence_test(country, 'FISCED', "Treatment 2")

Chi square: 15.025775990437191, p-value:0.010252767028775726


Unnamed: 0,FISCED,Treatment 2 No.,Treatment 2 share
0,,5,0.5
1,ISCED 1,42,0.531646
2,ISCED 2,174,0.508772
3,"ISCED 3B,C",40,0.606061
4,"ISCED 3A, ISCED 4",596,0.564929
5,ISCED 5B,351,0.596939
6,"ISCED 5A, 6",669,0.61489


# Language used at home
This measure is based on the question "What language do you speak at home most of the time?". Possible values:
1. Language of test
2. Other language.

In [23]:
def language_independence_test(country, treatment):
    chi_square, pi_val = chi_square_analysis(country['ST022Q01TA'], country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    language = country.groupby(['ST022Q01TA'])[treatment].agg(['sum', 'mean']).rename(index={1:"Language of test", 
                                                                                    2:"other language"})
    return language.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                               'mean': '{} share'.format(treatment),
                                                 'ST022Q01TA': 'Language used at home'})

In [24]:
# Treatment 1 analysis
language_independence_test(country, 'Treatment 1')

Chi square: 0.0, p-value:1.0


Unnamed: 0,Language used at home,Treatment 1 No.,Treatment 1 share
0,Language of test,2941,0.971589
1,other language,196,0.975124


In [25]:
# Treatment 2 analysis
language_independence_test(country, 'Treatment 2')

Chi square: 3.47407617497129, p-value:0.06233752096782575


Unnamed: 0,Language used at home,Treatment 2 No.,Treatment 2 share
0,Language of test,1747,0.577139
1,other language,130,0.646766


# Socio-economic index (ESCS)

In [26]:
def escs_independence_test(country, treatment):
    country["ESCS_quartiles"] = pd.qcut(country["ESCS"], 4, labels=['Q1','Q2','Q3', 'Q4'])
    chi_square, pi_val = chi_square_analysis(country['ESCS_quartiles'], country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    escs = country.groupby(['ESCS_quartiles'])[treatment].agg(['sum', 'mean'])
    return escs.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                               'mean': '{} share'.format(treatment)})



In [27]:
# Treatment 1 analysis
escs_independence_test(country, 'Treatment 1')

Chi square: 5.2807364774212076, p-value:0.1523572671777357


Unnamed: 0,ESCS_quartiles,Treatment 1 No.,Treatment 1 share
0,Q1,778,0.964064
1,Q2,791,0.980173
2,Q3,780,0.966543
3,Q4,788,0.976456


In [28]:
# Treatment 2 analysis
escs_independence_test(country, 'Treatment 2')

Chi square: 21.39968854342193, p-value:8.69545936604138e-05


Unnamed: 0,ESCS_quartiles,Treatment 2 No.,Treatment 2 share
0,Q1,432,0.535316
1,Q2,458,0.567534
2,Q3,466,0.577447
3,Q4,521,0.645601


# Primary start age ( ST126Q01TA)
This measure is based on the question "how old where you when you started <ISCED 1>? Years". Possible values are as follows: 
1. 3 years old or younger
2. 4 years old
3. 5 years old
4. 6 years old
5. 7 years old
6. 8 years old 
7. 9 years old or older

In [29]:
def primary_start_independence_test(country, treatment):
    chi_square, pi_val = chi_square_analysis(country['ST126Q01TA'], country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    start_age = country.groupby(['ST126Q01TA'])[treatment].agg(['sum', 'mean']).rename(index={1: '3 years or younger',
                                                                                   2: '4 years', 3: '5 years',
                                                                                   4: '6 years', 
                                                                                   5: '7 years', 
                                                                                   6: '8 years',
                                                                                   7: '9 years or older'})
    return start_age.reset_index().rename(columns={'sum':'{} No.'.format(treatment),
                                                   'mean': '{} share'.format(treatment), 
                                                   'ST126Q01TA':'Primary start age'})
    

In [30]:
# Treatment 1 Analysis
primary_start_independence_test(country, "Treatment 1")

Chi square: 10.176642344995804, p-value:0.001422309511628427


Unnamed: 0,Primary start age,Treatment 1 No.,Treatment 1 share
0,4 years,1415,0.955436
1,5 years,1336,0.983076
2,6 years,245,0.991903
3,7 years,105,1.0
4,8 years,27,1.0
5,9 years or older,9,1.0


In [31]:
# Treatment 2 Analysis
primary_start_independence_test(country, "Treatment 2")

Chi square: 357.1241828854779, p-value:4.2759214821714205e-77


Unnamed: 0,Primary start age,Treatment 2 No.,Treatment 2 share
0,4 years,614,0.414585
1,5 years,927,0.682119
2,6 years,204,0.825911
3,7 years,96,0.914286
4,8 years,27,1.0
5,9 years or older,9,1.0


# Language used with mother ( ST023Q01TA ) 
This measure is based on the question "which language do you usually speak with:My Mother". Possible values are as follows: 
1. Mostly heritage language,
2. About equally often heritage and test language
3. Mostly test language
4. Not applicable - heritage language and test language are the same

In [32]:
def language_used_with_parent(country, covariate, treatment):
    chi_square, pi_val = chi_square_analysis(country[covariate], country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    language = country.groupby([covariate])[treatment].agg(['sum', 'mean'])
    language = language.rename(index={0:'None', 1: 'Mostly heritage language', 
                                      2: 'About equally often heritage and test language', 
                                      3: 'Mostly test language', 4: 'Not applicable'})
    if covariate == "ST023Q01TA":
        return language.reset_index().rename(columns={'sum':'{} No.'.format(treatment),
                                                      'mean': '{} share'.format(treatment), 
                                                      'ST023Q01TA': 'Language used with mother'})
    return language.reset_index().rename(columns={'sum':'{} No.'.format(treatment),
                                                      'mean': '{} share'.format(treatment), 
                                                      'ST023Q02TA': 'Language used with father'})

In [33]:
# Treatment 1 Analysis
language_used_with_parent(country, "ST023Q01TA", "Treatment 1")

Chi square: 7.521094917495105, p-value:0.05701890348818106


Unnamed: 0,Language used with mother,Treatment 1 No.,Treatment 1 share
0,Mostly heritage language,788,0.963325
1,About equally often heritage and test language,220,0.952381
2,Mostly test language,1356,0.976945
3,Not applicable,773,0.977244


In [34]:
# Treatment 2 Analysis
language_used_with_parent(country, "ST023Q01TA", "Treatment 2")

Chi square: 3.2086212182394562, p-value:0.3605646927958255


Unnamed: 0,Language used with mother,Treatment 2 No.,Treatment 2 share
0,Mostly heritage language,494,0.603912
1,About equally often heritage and test language,129,0.558442
2,Mostly test language,808,0.582133
3,Not applicable,446,0.563843


# Language used with father ( ST023Q02TA )
See details in the previous section

In [35]:
# Treatment 1 Analysis
language_used_with_parent(country, "ST023Q02TA", "Treatment 1")

Chi square: 5.680205012859864, p-value:0.1282489625033575


Unnamed: 0,Language used with father,Treatment 1 No.,Treatment 1 share
0,Mostly heritage language,786,0.964417
1,About equally often heritage and test language,209,0.954338
2,Mostly test language,1357,0.976259
3,Not applicable,785,0.976368


In [36]:
# Treatment 2 Analysis
language_used_with_parent(country, "ST023Q02TA", "Treatment 2")

Chi square: 3.428706128084681, p-value:0.33012709174261895


Unnamed: 0,Language used with father,Treatment 2 No.,Treatment 2 share
0,Mostly heritage language,489,0.6
1,About equally often heritage and test language,122,0.557078
2,Mostly test language,816,0.58705
3,Not applicable,450,0.559701


## Occupation Classification - Mother
Occupation is classified by the International Standard Classification of Occupation (https://www.ilo.org/public/english/bureau/stat/isco/). <br> We devided the occupation codes to the following categories:
1. Armed forces occupations & managers
2. Professionals
3. Technicians and associate professionals
4. Clerical support workers
5. Service and sales workers
6. Skilled agricultural, forestry and fishery workers
7. Crafts and related trades workers
8. Pland and machine operators, and assemblers
9. Elementary occupations
10. Housewife and social beneficiary 

In [37]:
def occupation_dependency_test(country, covariate, treatment):
    chi_square, pi_val = chi_square_analysis(country[covariate], country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    occupation = country.groupby([covariate])[treatment].agg(['sum', 'mean'])
    occupation = occupation.rename(index={'A': 'Armed forces occupations & Managers',
                                          'B': 'Professionals',
                                          'C': 'Technicians and associate professionals',
                                          'D': 'Clerical support workers',
                                          'E': 'Service and sales workers',
                                          'F': 'Skilled agricultural, forestry and fishery workers',
                                          'G': 'Craft and related trades workers',
                                          'H': 'Plant and machine operators, and assemblers',
                                          'I': 'Elementary occupations',
                                          'J': 'Housewife and social beneficiary'})
    if "1" in covariate:
        return occupation.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                                        'mean': '{} share'.format(treatment),
                                                        'OCOD1': 'Occupation Classification - Mother'})
    return occupation.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                                    'mean': '{} share'.format(treatment),
                                                    'OCOD2': 'Occupation Classification - Father'})

In [38]:
# Treatment 1 Analysis
occupation_dependency_test(country, 'OCOD1', 'Treatment 1')

Chi square: 3.5130186302338724, p-value:0.6214186653748609


Unnamed: 0,Occupation Classification - Mother,Treatment 1 No.,Treatment 1 share
0,Armed forces occupations & Managers,280,0.962199
1,Professionals,909,0.976369
2,Technicians and associate professionals,344,0.969014
3,Clerical support workers,221,0.965066
4,Service and sales workers,904,0.976242
5,"Skilled agricultural, forestry and fishery wor...",15,0.9375
6,Craft and related trades workers,45,0.957447
7,"Plant and machine operators, and assemblers",37,0.973684
8,Elementary occupations,160,0.941176
9,Housewife and social beneficiary,222,0.986667


In [39]:
# Treatment 2 Analysis
occupation_dependency_test(country, 'OCOD1', 'Treatment 2')

Chi square: 15.26145416765581, p-value:0.08400011580704984


Unnamed: 0,Occupation Classification - Mother,Treatment 2 No.,Treatment 2 share
0,Armed forces occupations & Managers,180,0.618557
1,Professionals,573,0.615467
2,Technicians and associate professionals,212,0.597183
3,Clerical support workers,122,0.532751
4,Service and sales workers,517,0.558315
5,"Skilled agricultural, forestry and fishery wor...",7,0.4375
6,Craft and related trades workers,22,0.468085
7,"Plant and machine operators, and assemblers",22,0.578947
8,Elementary occupations,94,0.552941
9,Housewife and social beneficiary,128,0.568889


## Occupation Classification - Father
See details in the previous section. 

In [40]:
# Treatment 1 Analysis
occupation_dependency_test(country, 'OCOD2', 'Treatment 1')

Chi square: 2.3718955931174652, p-value:0.8825200885777142


Unnamed: 0,Occupation Classification - Father,Treatment 1 No.,Treatment 1 share
0,Armed forces occupations & Managers,505,0.976789
1,Professionals,605,0.975806
2,Technicians and associate professionals,293,0.973422
3,Clerical support workers,46,1.0
4,Service and sales workers,351,0.975
5,"Skilled agricultural, forestry and fishery wor...",198,0.965854
6,Craft and related trades workers,682,0.964639
7,"Plant and machine operators, and assemblers",334,0.97093
8,Elementary occupations,80,0.963855
9,Housewife and social beneficiary,43,0.955556


In [41]:
# Treatment 2 Analysis
occupation_dependency_test(country, 'OCOD2', 'Treatment 2')

Chi square: 27.158070270615276, p-value:0.0013169759609365612


Unnamed: 0,Occupation Classification - Father,Treatment 2 No.,Treatment 2 share
0,Armed forces occupations & Managers,322,0.622824
1,Professionals,367,0.591935
2,Technicians and associate professionals,200,0.664452
3,Clerical support workers,33,0.717391
4,Service and sales workers,200,0.555556
5,"Skilled agricultural, forestry and fishery wor...",106,0.517073
6,Craft and related trades workers,398,0.562942
7,"Plant and machine operators, and assemblers",180,0.523256
8,Elementary occupations,44,0.53012
9,Housewife and social beneficiary,27,0.6


## ISEI of mother (International Socio-Economic Index of Occupational Status)  

In [42]:
def parent_isei_dependency_test(country, covariate, treatment):
    quartiles = pd.qcut(country[covariate], 4, labels=['Q1','Q2','Q3', 'Q4'])
    chi_square, pi_val = chi_square_analysis(quartiles, country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    isei = country.groupby([quartiles])[treatment].agg(['sum', 'mean'])
    if '1' in covariate:
        return isei.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                                  'mean': '{} share'.format(treatment), 
                                                  'BMMJ1':'ISEI - Mother'})
    return isei.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                                  'mean': '{} share'.format(treatment), 
                                                  'BFMJ2':'ISEI - Father'})

In [43]:
# Treatment 1 Analysis
parent_isei_dependency_test(country, 'BMMJ1', 'Treatment 1')

Chi square: 1.4246945572487262, p-value:0.6997565359557241


Unnamed: 0,ISEI - Mother,Treatment 1 No.,Treatment 1 share
0,Q1,832,0.969697
1,Q2,758,0.975547
2,Q3,870,0.967742
3,Q4,677,0.975504


In [44]:
# Treatment 2 Analysis
parent_isei_dependency_test(country, 'BMMJ1', 'Treatment 2')

Chi square: 19.131184204462652, p-value:0.0002568432652579091


Unnamed: 0,ISEI - Mother,Treatment 2 No.,Treatment 2 share
0,Q1,473,0.551282
1,Q2,421,0.541828
2,Q3,539,0.599555
3,Q4,444,0.639769


# ISEI of Father (International Socio-Economic Index of Occupational Status) 

In [45]:
# Treatment 1 Analysis
parent_isei_dependency_test(country, 'BFMJ2', 'Treatment 1')

Chi square: 1.545145575975776, p-value:0.6718906268658227


Unnamed: 0,ISEI - Father,Treatment 1 No.,Treatment 1 share
0,Q1,823,0.971665
1,Q2,769,0.967296
2,Q3,765,0.970812
3,Q4,780,0.977444


In [46]:
# Treatment 2 Analysis
parent_isei_dependency_test(country, 'BFMJ2', 'Treatment 2')

Chi square: 13.273912228926513, p-value:0.00408018788264028


Unnamed: 0,ISEI - Father,Treatment 2 No.,Treatment 2 share
0,Q1,470,0.5549
1,Q2,434,0.545912
2,Q3,484,0.614213
3,Q4,489,0.612782


# Wealth Index

In [47]:
def wealth_index_dependency_test(country, treatment):
    quartiles = pd.qcut(country["WEALTH"], 4, labels=['Q1','Q2','Q3', 'Q4'])
    chi_square, pi_val = chi_square_analysis(quartiles, country[treatment])
    print("Chi square: {}, p-value:{}".format(chi_square, pi_val))
    wealth = country.groupby([quartiles])[treatment].agg(['sum', 'mean'])
    return wealth.reset_index().rename(columns={'sum':'{} No.'.format(treatment), 
                                                  'mean': '{} share'.format(treatment), 
                                                  'WEALTH':'Wealth Index'})

In [48]:
# Treatment 1 Analysis
wealth_index_dependency_test(country, "Treatment 1")

Chi square: 3.3750764330283847, p-value:0.33732934751347243


Unnamed: 0,Wealth Index,Treatment 1 No.,Treatment 1 share
0,Q1,788,0.976456
1,Q2,780,0.965347
2,Q3,789,0.977695
3,Q4,780,0.967742


In [49]:
# Treatment 2 Analysis
wealth_index_dependency_test(country, "Treatment 2")

Chi square: 3.576527561025356, p-value:0.31097154520324083


Unnamed: 0,Wealth Index,Treatment 2 No.,Treatment 2 share
0,Q1,455,0.563817
1,Q2,461,0.570545
2,Q3,472,0.584882
3,Q4,489,0.6067
