In [83]:
import pandas as pd
import numpy as np
from datetime import date
from statistics import mean, mode
from pandas.api.types import CategoricalDtype
import math
from scipy.stats import norm

In [84]:
today = date.today()
print(today)

2024-02-10


# Tables

In [85]:
path_tables = "C:/Users/seoul/Dropbox/00 technical/github/nkelites/data/combined data/combined data - 2 tables/"

In [86]:
# tables
filename_careerorglink = "careerorglink.xlsx"
filename_leadercareerlink = "leadercareerlink.xlsx"
filename_orgtree = "orgtree.xlsx"
filename_elected = "positions_elected.xlsx"

In [87]:
# career-org link
# col = pd.read_excel(path_tables + filename_careerorglink,dtype="str")
# col.shape

In [88]:
# col.columns

In [89]:
# leader-career link
# lcl = pd.read_excel(path_tables + filename_leadercareerlink,dtype="str")
# lcl.shape

In [90]:
# lcl.columns

In [91]:
# orgtree
# org = pd.read_excel(path_tables + filename_orgtree,dtype="str")
# org.shape

In [92]:
# org.columns

In [93]:
# elected = pd.read_excel(path_tables + filename_elected,dtype="str")
# elected.shape

In [94]:
# elected.columns

# Queries

In [95]:
path_queries = "C:/Users/seoul/Dropbox/00 technical/github/nkelites/data/combined data/combined data - 3 queries/"

In [96]:
filename_leaderjob_all = "leaderjob_electUnelect_inOutgov.xlsx"
filename_leaderjob_no_spa = "leaderjob_no_spa.xlsx"
filename_leaderjobtransition_no_spa = "leaderjobtransition_no_spa.xlsx"

In [97]:
# leader jobs
# ljobs_all = pd.read_excel(path_queries + filename_leaderjob_all,dtype="str")
# ljobs_all.shape

In [98]:
# ljobs = pd.read_excel(path_queries + filename_leaderjob_no_spa,dtype="str")
# ljobs.shape

In [99]:
# transitions used for analysis - no SPA - no local-local
trans = pd.read_excel(path_queries + filename_leaderjobtransition_no_spa,dtype="str")
trans.shape

(4306, 31)

# Analysis - Research Note

In [100]:
path_analysis = "C:/Users/seoul/Dropbox/00 technical/github/nkelites/analysis/"

In [101]:
# analysis sub-paths
study0_path = "2023.10.04 Study 0 - research note/"
study1_path = "2023.10.04 Study 1 - political capital/"
study2_path = "2023.10.04 Study 2 - commitment vs control/"
study3_path = "2023.10.04 Study 3 - reds vs experts/"

In [102]:
# ljobs = ljobs.astype({"CareerStartYear":"int","CareerStartDate":"int"})
# ljobs.dtypes

# Functions

In [103]:
def merge_results(m):
    
    print("\nMerge Results...")
    print("")
    print("\tshape     :",m.shape)
    print("\tleft_only :",m[m["_merge"]=="left_only"].shape)
    print("\tboth      :",m[m["_merge"]=="both"].shape)
    print("\tright_only:",m[m["_merge"]=="right_only"].shape)

In [104]:
# using this on (PI,OrgName) will ensure unique & non-null keys
# using this on a larger df will ensure unique rows and non-null keys, but not unique keys

def unique_non_null_rows(olddf):
    
    df = olddf.copy()
    
    
    ### drop duplicates
    df = df.drop_duplicates(keep="first",ignore_index=True)

    ### drop null rows
    df.dropna(how="all",axis=0,inplace=True)
    
    # drop rows with null PI
    df = df[~(df["PrimaryInstitution"].isna())]
        
    ### drop rows where PI contains stop words
    stop_words_lower = ["uncertain","current","deprecated","please_revise"]
    df = df[ ~ df["PrimaryInstitution"].str.lower().isin(stop_words_lower)]
            

    print("\nUnique Non-null Rows...")
    print("")
    print("\tNon-unique rows:",olddf.shape)
    print("\tUnique rows    :",df.shape)

    ### sort
    df = df.sort_values(["PrimaryInstitution","OrgName"])
    
    return df

In [105]:
def create_time_series(series,group_var,count_var):
    
    yeardist = series.groupby(group_var,as_index=False).count().sort_values(group_var)
    
    max_year = max(series[group_var])
    x = pd.DataFrame({"year":pd.Series(range(min(yeardist[group_var].astype(int)),max(yeardist[group_var].astype(int))+1))})
    
    yeardist[group_var] = yeardist[group_var].astype(int,errors="ignore")
    
    ts = x.merge(yeardist,left_on="year",right_on=group_var,how="left")
    ts.loc[ts[count_var].isna(),count_var]=0
    # merge_results(ts)
    
    ts_columns = ["year",count_var]
    ts = ts[ts_columns]
    
    return ts

In [106]:
# create InstitutionCategory (English) variable InstitutionType

# use with apply. e.g., 
# df["InstitutionCategory] = df["InstitutionType"].apply(define_institution_category)

def define_institution_category(PI):
    
    c = "Social"
    
    if PI=="정권기관":
        c = "Government"
    elif PI=="노동당":
        c = "Party"
    elif PI=="인민군":
        c = "Military"
        
    return c

In [107]:
def two_sample_ttest_proportions(x1,x2,n1,n2,alpha):
    
    pstar = (x1+x2)/(n1+n2)
    p1 = x1/n1
    p2 = x2/n2
    z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
    pvalue = 1-norm.cdf(abs(z))
    
    # add significance stars to result
    stars = ""
    if pvalue<.05:
        stars = "*"
    if pvalue<.01:
        stars = "**"
    if pvalue<.001:
        stars = "***"
    result = str(pvalue)+stars
    
    print("p1:\t",p1)
    print("p2:\t",p2)
    print("p1-p2:\t",(p1-p2))
    print("z:\t",z)
    print("pvalue:\t",result)

In [108]:
# apply two_sample_ttest_proportions across categories
# InstitutionCategory_1

def ttest_by_institution_category(ics,stat,between_row,within_row,period1_col,period2_col):
        
    for ic in ics:

        print("\n\n",ic,"\n\n")

        stat_ic = stat[stat.InstitutionCategory_1==ic]

        # two-sample ttest of proportions: p(between PI trans in s1) > p(between PI trans in s2)
        x1 = stat_ic.iloc[period1_col,between_row]
        n1 = stat_ic.iloc[period1_col,between_row] + stat_ic.iloc[period1_col,within_row]
        x2 = stat_ic.iloc[period2_col,between_row]
        n2 = stat_ic.iloc[period2_col,between_row] + stat_ic.iloc[period2_col,within_row]
        alpha = .05
        print(x1,x2,n1,n2,alpha)
        two_sample_ttest_proportions(x1,x2,n1,n2,alpha)

In [177]:
# apply two_sample_ttest_proportions across categories
# InstitutionCategory_2

def ttest_by_institution_category_2(ics,stat,between_row,within_row,period1_col,period2_col):
        
    for ic in ics:

        print("\n\n",ic,"\n\n")

        stat_ic = stat[stat.InstitutionCategory_2==ic]

        # two-sample ttest of proportions: p(between PI trans in s1) > p(between PI trans in s2)
        x1 = stat_ic.iloc[period1_col,between_row]
        n1 = stat_ic.iloc[period1_col,between_row] + stat_ic.iloc[period1_col,within_row]
        x2 = stat_ic.iloc[period2_col,between_row]
        n2 = stat_ic.iloc[period2_col,between_row] + stat_ic.iloc[period2_col,within_row]
        alpha = .05
        print(x1,x2,n1,n2,alpha)
        two_sample_ttest_proportions(x1,x2,n1,n2,alpha)

# Format & Covariates

### 0. change datatypes

In [109]:
trans = trans.astype({"OrgAdvance":"int","PositionAdvance":"int",
                      "CareerStartYear_1":"int","CareerStartYear_2":"int",
                      "CareerStartDate_1":"int","CareerStartDate_2":"int"})
trans.dtypes

LeaderID                   object
CareerString_1             object
CareerDateString_2022_1    object
CareerStartYear_1           int32
CareerStartMonth_1         object
CareerStartDate_1           int32
CareerSubstring_1          object
InstitutionType_1          object
PrimaryInstitution_1       object
OrgName_1                  object
Local_1                    object
Position_1                 object
IsElected_1                object
OrgRank_1                  object
PositionRank_1             object
CareerString_2             object
CareerDateString_2022_2    object
CareerStartYear_2           int32
CareerStartMonth_2         object
CareerStartDate_2           int32
CareerSubstring_2          object
InstitutionType_2          object
PrimaryInstitution_2       object
OrgName_2                  object
Local_2                    object
Position_2                 object
IsElected_2                object
OrgRank_2                  object
PositionRank_2             object
OrgAdvance    

### 1. double-check exlcudes SPA jobs and local-local trans

In [110]:
trans.columns

Index(['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
       'OrgName_1', 'Local_1', 'Position_1', 'IsElected_1', 'OrgRank_1',
       'PositionRank_1', 'CareerString_2', 'CareerDateString_2022_2',
       'CareerStartYear_2', 'CareerStartMonth_2', 'CareerStartDate_2',
       'CareerSubstring_2', 'InstitutionType_2', 'PrimaryInstitution_2',
       'OrgName_2', 'Local_2', 'Position_2', 'IsElected_2', 'OrgRank_2',
       'PositionRank_2', 'OrgAdvance', 'PositionAdvance'],
      dtype='object')

In [111]:
trans.shape

(4306, 31)

In [112]:
trans.Local_1.unique()

array(['False', 'True'], dtype=object)

In [113]:
# remove Local-Local ties
trans = trans[~((trans["Local_1"]=="True") & (trans["Local_2"]=="True"))]
trans.shape

(4222, 31)

### 2. add InstitutionCategory_1, InstitutionCategory_2

In [114]:
trans.InstitutionType_1.unique()

array(['정권기관', '국제친선단체', '노동당', '인민군', '당외곽및사회단체_사회부문(별책)',
       '당외곽및사회단체_사회부문', '당외곽및사회단체_체육부문', '당외곽및사회단체_정치부문', '당외곽및사회단체_대외부문',
       '당외곽및사회단체_근로단체', '당외곽및사회단체_종교부문', '당외곽및사회단체_경제부문(별책)',
       '당외곽및사회단체_경제부문'], dtype=object)

### 3. PISame, OrgSame

In [115]:
trans.columns

Index(['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
       'OrgName_1', 'Local_1', 'Position_1', 'IsElected_1', 'OrgRank_1',
       'PositionRank_1', 'CareerString_2', 'CareerDateString_2022_2',
       'CareerStartYear_2', 'CareerStartMonth_2', 'CareerStartDate_2',
       'CareerSubstring_2', 'InstitutionType_2', 'PrimaryInstitution_2',
       'OrgName_2', 'Local_2', 'Position_2', 'IsElected_2', 'OrgRank_2',
       'PositionRank_2', 'OrgAdvance', 'PositionAdvance'],
      dtype='object')

In [116]:
trans["PISame"] = False
trans.loc[trans["PrimaryInstitution_1"]==trans["PrimaryInstitution_2"],["PISame"]] = True
trans.PISame.unique()

array([False,  True])

In [117]:
trans["OrgSame"] = np.nan
trans.loc[trans["PISame"]==True,"OrgSame"] = False
trans.loc[(trans["PISame"]==True) & (trans["OrgName_1"]==trans["OrgName_2"]),["OrgSame"]] = True
trans.OrgSame.unique()

array([nan, True, False], dtype=object)

### 4. OrgRankChange, PositionRankChange

In [118]:
trans["OrgRankChange"] = np.nan

In [119]:
trans.loc[trans["PISame"] & (trans["OrgRank_1"]>trans["OrgRank_2"]),"OrgRankChange"] = "lower"
trans.loc[trans["PISame"] & (trans["OrgRank_1"]==trans["OrgRank_2"]),"OrgRankChange"] = "same"
trans.loc[trans["PISame"] & (trans["OrgRank_1"]<trans["OrgRank_2"]),"OrgRankChange"] = "higher"

In [120]:
value_order = ["lower","same","higher"]
trans.OrgRankChange = trans.OrgRankChange.astype("category")
trans.OrgRankChange = trans.OrgRankChange.cat.set_categories(value_order)

In [121]:
trans.OrgRankChange.unique()

[NaN, 'same', 'higher', 'lower']
Categories (3, object): ['lower', 'same', 'higher']

In [122]:
trans["PositionRankChange"] = np.nan

In [123]:
# PositionRankChange, narrowly defined, with OrgSame=True
# trans.loc[trans["OrgSame"] & (trans["PositionRank_1"]>trans["PositionRank_2"]),"PositionRankChange"] = "lower"
# trans.loc[trans["OrgSame"] & (trans["PositionRank_1"]==trans["PositionRank_2"]),"PositionRankChange"] = "same"
# trans.loc[trans["OrgSame"] & (trans["PositionRank_1"]<trans["PositionRank_2"]),"PositionRankChange"] = "higher"

In [124]:
# PositionRankChange, broadly defined, with PISame=True & OrgRankChange="same"
trans.loc[trans["PISame"] & trans["OrgRankChange"].isin(["same"]) & (trans["PositionRank_1"]>trans["PositionRank_2"]),"PositionRankChange"] = "lower"
trans.loc[trans["PISame"] & trans["OrgRankChange"].isin(["same"]) & (trans["PositionRank_1"]==trans["PositionRank_2"]),"PositionRankChange"] = "same"
trans.loc[trans["PISame"] & trans["OrgRankChange"].isin(["same"]) & (trans["PositionRank_1"]<trans["PositionRank_2"]),"PositionRankChange"] = "higher"

In [125]:
value_order = ["lower","same","higher"]
trans.PositionRankChange = trans.PositionRankChange.astype("category")
trans.PositionRankChange = trans.PositionRankChange.cat.set_categories(value_order)

In [126]:
trans.PositionRankChange.unique()

[NaN, 'same', 'higher', 'lower']
Categories (3, object): ['lower', 'same', 'higher']

### 5. Succession Period - Broad

* KIS-->KJI: 1974-1993 - less institutionalized (stronger intra)
* KJI--->KJU:  2002-2011 - more institutionalized (stronger inter)

In [127]:
trans["Succession_Broad"] = np.nan

In [128]:
value_order = ["KIS to KJI","KJI to KJU"]
trans.Succession_Broad = trans.Succession_Broad.astype("category")
trans.Succession_Broad = trans.Succession_Broad.cat.set_categories(value_order)

In [129]:
trans.loc[(trans["CareerStartYear_2"]>1973) & (trans["CareerStartYear_2"]<1994),"Succession_Broad"] = "KIS to KJI"
trans.loc[(trans["CareerStartYear_2"]>2001) & (trans["CareerStartYear_2"]<2012),"Succession_Broad"] = "KJI to KJU"

In [130]:
trans[["Succession_Broad","OrgName_2"]].groupby("Succession_Broad",as_index=False).count()

Unnamed: 0,Succession_Broad,OrgName_2
0,KIS to KJI,674
1,KJI to KJU,648


In [131]:
trans.loc[trans["Succession_Broad"]=="KIS to KJI","CareerStartYear_2"]

34      1977
35      1977
36      1979
37      1980
38      1982
        ... 
3805    1980
3934    1993
4285    1977
4286    1980
4287    1990
Name: CareerStartYear_2, Length: 1006, dtype: int32

### 6. Succession Period - Narrow

#### Esther's periodization
* KIS-->KJI: 1987-1994 - less institutionalized (stronger intra)
* KJI--->KJU:  2009-2011 - more institutionalized (stronger inter)

In [132]:
trans["Succession_Narrow"] = np.nan

In [133]:
value_order = ["KIS to KJI","KJI to KJU"]
trans.Succession_Narrow = trans.Succession_Narrow.astype("category")
trans.Succession_Narrow = trans.Succession_Narrow.cat.set_categories(value_order)

In [134]:
### Esther's periodization

trans.loc[(trans["CareerStartYear_2"]>1986) & (trans["CareerStartYear_2"]<1995),"Succession_Narrow"] = "KIS to KJI"
trans.loc[(trans["CareerStartYear_2"]>2008) & (trans["CareerStartYear_2"]<2012),"Succession_Narrow"] = "KJI to KJU"

In [135]:
trans[["Succession_Narrow","OrgName_2"]].groupby("Succession_Narrow",as_index=False).count()

Unnamed: 0,Succession_Narrow,OrgName_2
0,KIS to KJI,323
1,KJI to KJU,383


### 7. OrgAdvanceYes

In [136]:
trans["OrgAdvanceYes"] = np.nan

In [137]:
trans.loc[trans["PISame"]==True,"OrgAdvanceYes"] = False
trans.loc[trans["PISame"]==True & (trans["OrgRank_1"]>trans["OrgRank_2"]),"OrgAdvanceYes"] = True

### 8. PositionAdvanceYes

In [138]:
trans["PositionAdvanceYes"] = np.nan

In [139]:
trans.loc[trans["OrgSame"]==True,"PositionAdvanceYes"] = False
trans.loc[trans["OrgSame"]==True & (trans["PositionRank_1"]>trans["PositionRank_2"]),"PositionAdvanceYes"] = True

### 9. InstitutionCategory

In [140]:
trans.columns

Index(['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
       'OrgName_1', 'Local_1', 'Position_1', 'IsElected_1', 'OrgRank_1',
       'PositionRank_1', 'CareerString_2', 'CareerDateString_2022_2',
       'CareerStartYear_2', 'CareerStartMonth_2', 'CareerStartDate_2',
       'CareerSubstring_2', 'InstitutionType_2', 'PrimaryInstitution_2',
       'OrgName_2', 'Local_2', 'Position_2', 'IsElected_2', 'OrgRank_2',
       'PositionRank_2', 'OrgAdvance', 'PositionAdvance', 'PISame', 'OrgSame',
       'OrgRankChange', 'PositionRankChange', 'Succession_Broad',
       'Succession_Narrow', 'OrgAdvanceYes', 'PositionAdvanceYes'],
      dtype='object')

In [141]:
trans["InstitutionCategory_1"] = trans["InstitutionType_1"].apply(define_institution_category)
trans["InstitutionCategory_2"] = trans["InstitutionType_2"].apply(define_institution_category)

### 10. InstitutionCategorySame

In [142]:
trans["InstitutionCategorySame"] = trans["InstitutionCategory_1"] == trans["InstitutionCategory_2"]

### 11. Succession_Five

#### Jacob's periodization
* KIS-->KJI: 1994-1999 - less institutionalized (stronger intra)
* KJI--->KJU:  2011-2016 - more institutionalized (stronger inter)

In [143]:
trans["Succession_Five"] = np.nan

In [144]:
value_order = ["KIS to KJI","KJI to KJU"]
trans.Succession_Five = trans.Succession_Five.astype("category")
trans.Succession_Five = trans.Succession_Five.cat.set_categories(value_order)

In [145]:
### Jacob's periodization

trans.loc[(trans["CareerStartYear_2"]>1993) & (trans["CareerStartYear_2"]<2000),"Succession_Five"] = "KIS to KJI"
trans.loc[(trans["CareerStartYear_2"]>2010) & (trans["CareerStartYear_2"]<2016),"Succession_Five"] = "KJI to KJU"

In [146]:
trans[["Succession_Five","OrgName_2"]].groupby("Succession_Five",as_index=False).count()

Unnamed: 0,Succession_Five,OrgName_2
0,KIS to KJI,229
1,KJI to KJU,229


# Hypotheses Series 1. s1s2, trans count, within & between PI

### 1. s1s2 trans count (within & between PI), succession broad

In [147]:
stat1_columns = ["PISame","Succession_Broad","OrgName_2"]
stat1_groupby_columns = ["Succession_Broad","PISame"]
# stat1_label_columns = ["Year","Total Transitions","Total Advancements of OrgRank"]

In [148]:
stat1 = trans[stat1_columns].groupby(stat1_groupby_columns,as_index=False)["OrgName_2"].agg({"Total Transitions":"count"})

# pivot PISame from column to rows
stat1 = stat1.pivot(index="Succession_Broad",columns="PISame",values="Total Transitions")
# pivot_column_labels = ["Succession_Broad","Between PI Trans","Within PI Trans"]
pivot_column_labels = ["Between PI Trans","Within PI Trans"]
stat1.columns = pivot_column_labels
stat1 = stat1.reset_index()

stat1

Unnamed: 0,Succession_Broad,Between PI Trans,Within PI Trans
0,KIS to KJI,425,249
1,KJI to KJU,363,285


In [149]:
stat1["Percentage Between PI Trans"] = stat1.apply(lambda x: round(x["Between PI Trans"] / (x["Between PI Trans"] + x["Within PI Trans"]),4) if (x["Between PI Trans"] + x["Within PI Trans"]) > 0 else np.nan,axis=1)
stat1

Unnamed: 0,Succession_Broad,Between PI Trans,Within PI Trans,Percentage Between PI Trans
0,KIS to KJI,425,249,0.6306
1,KJI to KJU,363,285,0.5602


In [150]:
# two-sample ttest of proportions: p(between PI trans in s1) > p(between PI trans in s2)
x1 = stat1.iloc[0,1]
n1 = stat1.iloc[0,1] + stat1.iloc[0,2]
x2 = stat1.iloc[1,1]
n2 = stat1.iloc[1,1] + stat1.iloc[1,2]
alpha = .05
print(x1,x2,n1,n2,alpha)
two_sample_ttest_proportions(x1,x2,n1,n2,alpha)

425 363 674 648 0.05
p1:	 0.6305637982195845
p2:	 0.5601851851851852
p1-p2:	 0.0703786130343993
z:	 2.6069946827170716
pvalue:	 0.004567038935540002**


### 2. s1s2 trans count (within & between PI), succession narrow

In [151]:
stat2_columns = ["PISame","Succession_Narrow","OrgName_2"]
stat2_groupby_columns = ["Succession_Narrow","PISame"]
# stat1_label_columns = ["Year","Total Transitions","Total Advancements of OrgRank"]

In [152]:
stat2 = trans[stat2_columns].groupby(stat2_groupby_columns,as_index=False)["OrgName_2"].agg({"Total Transitions":"count"})

# pivot PISame from column to rows
stat2 = stat2.pivot(index="Succession_Narrow",columns="PISame",values="Total Transitions")
# pivot_column_labels = ["Succession_Broad","Between PI Trans","Within PI Trans"]
pivot_column_labels = ["Between PI Trans","Within PI Trans"]
stat2.columns = pivot_column_labels
stat2 = stat2.reset_index()

stat2

Unnamed: 0,Succession_Narrow,Between PI Trans,Within PI Trans
0,KIS to KJI,196,127
1,KJI to KJU,242,141


In [153]:
stat2["Percentage Between PI Trans"] = stat2.apply(lambda x: round(x["Between PI Trans"] / (x["Between PI Trans"] + x["Within PI Trans"]),4) if (x["Between PI Trans"] + x["Within PI Trans"]) > 0 else np.nan,axis=1)
stat2

Unnamed: 0,Succession_Narrow,Between PI Trans,Within PI Trans,Percentage Between PI Trans
0,KIS to KJI,196,127,0.6068
1,KJI to KJU,242,141,0.6319


In [154]:
# two-sample ttest of proportions: p(between PI trans in s1) > p(between PI trans in s2)
x1 = stat2.iloc[0,1]
n1 = stat2.iloc[0,1] + stat2.iloc[0,2]
x2 = stat2.iloc[1,1]
n2 = stat2.iloc[1,1] + stat2.iloc[1,2]
alpha = .05
print(x1,x2,n1,n2,alpha)
two_sample_ttest_proportions(x1,x2,n1,n2,alpha)

196 242 323 383 0.05
p1:	 0.6068111455108359
p2:	 0.6318537859007833
p1-p2:	 -0.025042640389947413
z:	 -0.6830911575480088
pvalue:	 0.24727462127586397


### 3. s1s2 trans count (within & between PI), succession broad, by sending PI

In [155]:
stat3_columns = ["InstitutionCategory_1","PISame","Succession_Broad","OrgName_2"]
stat3_groupby_columns = ["InstitutionCategory_1","Succession_Broad","PISame"]

In [156]:
stat3 = trans[stat3_columns].groupby(stat3_groupby_columns,as_index=False)["OrgName_2"].agg({"Total Transitions":"count"})

# pivot PISame from column to rows
stat3 = stat3.pivot(index=["InstitutionCategory_1","Succession_Broad",],columns="PISame",values="Total Transitions")
# pivot_column_labels = ["Succession_Broad","Between PI Trans","Within PI Trans"]
pivot_column_labels = ["Between PI Trans","Within PI Trans"]
stat3.columns = pivot_column_labels
stat3 = stat3.reset_index()

stat3

Unnamed: 0,InstitutionCategory_1,Succession_Broad,Between PI Trans,Within PI Trans
0,Government,KIS to KJI,175,90
1,Government,KJI to KJU,120,169
2,Military,KIS to KJI,44,4
3,Military,KJI to KJU,82,7
4,Party,KIS to KJI,74,143
5,Party,KJI to KJU,28,96
6,Social,KIS to KJI,132,12
7,Social,KJI to KJU,133,13


In [157]:
stat3["Percentage Between PI Trans"] = stat3.apply(lambda x: round(x["Between PI Trans"] / (x["Between PI Trans"] + x["Within PI Trans"]),4) if (x["Between PI Trans"] + x["Within PI Trans"]) > 0 else np.nan,axis=1)
stat3

Unnamed: 0,InstitutionCategory_1,Succession_Broad,Between PI Trans,Within PI Trans,Percentage Between PI Trans
0,Government,KIS to KJI,175,90,0.6604
1,Government,KJI to KJU,120,169,0.4152
2,Military,KIS to KJI,44,4,0.9167
3,Military,KJI to KJU,82,7,0.9213
4,Party,KIS to KJI,74,143,0.341
5,Party,KJI to KJU,28,96,0.2258
6,Social,KIS to KJI,132,12,0.9167
7,Social,KJI to KJU,133,13,0.911


In [158]:
ics = list(trans.InstitutionCategory_2.unique())
ics

['Government', 'Social', 'Party', 'Military']

In [159]:
ttest_by_institution_category(ics,stat3,2,3,0,1)



 Government 


175 120 265 289 0.05
p1:	 0.660377358490566
p2:	 0.41522491349480967
p1-p2:	 0.24515244499575634
z:	 5.776996170646588
pvalue:	 3.8023008075427356e-09***


 Social 


132 133 144 146 0.05
p1:	 0.9166666666666666
p2:	 0.910958904109589
p1-p2:	 0.005707762557077611
z:	 0.17315284424354763
pvalue:	 0.4312656442570164


 Party 


74 28 217 124 0.05
p1:	 0.34101382488479265
p2:	 0.22580645161290322
p1-p2:	 0.11520737327188943
z:	 2.235111213838738
pvalue:	 0.012705024181294933*


 Military 


44 82 48 89 0.05
p1:	 0.9166666666666666
p2:	 0.9213483146067416
p1-p2:	 -0.004681647940074973
z:	 -0.09620398060589609
pvalue:	 0.46167928470764663


### 4. s1s2 trans count (within & between PI), succession narrow, by sending PI

In [160]:
stat4_columns = ["InstitutionCategory_1","PISame","Succession_Narrow","OrgName_2"]
stat4_groupby_columns = ["InstitutionCategory_1","Succession_Narrow","PISame"]

In [161]:
stat4 = trans[stat4_columns].groupby(stat4_groupby_columns,as_index=False)["OrgName_2"].agg({"Total Transitions":"count"})

# pivot PISame from column to rows
stat4 = stat4.pivot(index=["InstitutionCategory_1","Succession_Narrow",],columns="PISame",values="Total Transitions")
# pivot_column_labels = ["Succession_Broad","Between PI Trans","Within PI Trans"]
pivot_column_labels = ["Between PI Trans","Within PI Trans"]
stat4.columns = pivot_column_labels
stat4 = stat4.reset_index()

stat4

Unnamed: 0,InstitutionCategory_1,Succession_Narrow,Between PI Trans,Within PI Trans
0,Government,KIS to KJI,77,56
1,Government,KJI to KJU,79,51
2,Military,KIS to KJI,13,2
3,Military,KJI to KJU,74,4
4,Party,KIS to KJI,38,66
5,Party,KJI to KJU,18,85
6,Social,KIS to KJI,68,3
7,Social,KJI to KJU,71,1


In [162]:
stat4["Percentage Between PI Trans"] = stat4.apply(lambda x: round(x["Between PI Trans"] / (x["Between PI Trans"] + x["Within PI Trans"]),4) if (x["Between PI Trans"] + x["Within PI Trans"]) > 0 else np.nan,axis=1)
stat4

Unnamed: 0,InstitutionCategory_1,Succession_Narrow,Between PI Trans,Within PI Trans,Percentage Between PI Trans
0,Government,KIS to KJI,77,56,0.5789
1,Government,KJI to KJU,79,51,0.6077
2,Military,KIS to KJI,13,2,0.8667
3,Military,KJI to KJU,74,4,0.9487
4,Party,KIS to KJI,38,66,0.3654
5,Party,KJI to KJU,18,85,0.1748
6,Social,KIS to KJI,68,3,0.9577
7,Social,KJI to KJU,71,1,0.9861


In [163]:
ics = list(trans.InstitutionCategory_2.unique())
ics

['Government', 'Social', 'Party', 'Military']

In [164]:
ttest_by_institution_category(ics,stat4,2,3,0,1)



 Government 


77 79 133 130 0.05
p1:	 0.5789473684210527
p2:	 0.6076923076923076
p1-p2:	 -0.028744939271254988
z:	 -0.4744413995109552
pvalue:	 0.3175925884139059


 Social 


68 71 71 72 0.05
p1:	 0.9577464788732394
p2:	 0.9861111111111112
p1-p2:	 -0.02836463223787178
z:	 -1.028497410534081
pvalue:	 0.15185795413987369


 Party 


38 18 104 103 0.05
p1:	 0.36538461538461536
p2:	 0.17475728155339806
p1-p2:	 0.1906273338312173
z:	 3.0869041266057793
pvalue:	 0.0010112640850469257**


 Military 


13 74 15 78 0.05
p1:	 0.8666666666666667
p2:	 0.9487179487179487
p1-p2:	 -0.08205128205128198
z:	 -1.1846349051637255
pvalue:	 0.11808091496140638


### 5. s1s2 trans count (within & between PI), succession broad, by receiving PI

In [165]:
stat5_columns = ["InstitutionCategory_2","PISame","Succession_Broad","OrgName_2"]
stat5_groupby_columns = ["InstitutionCategory_2","Succession_Broad","PISame"]

In [166]:
stat5 = trans[stat5_columns].groupby(stat5_groupby_columns,as_index=False)["OrgName_2"].agg({"Total Transitions":"count"})

# pivot PISame from column to rows
stat5 = stat5.pivot(index=["InstitutionCategory_2","Succession_Broad",],columns="PISame",values="Total Transitions")
# pivot_column_labels = ["Succession_Broad","Between PI Trans","Within PI Trans"]
pivot_column_labels = ["Between PI Trans","Within PI Trans"]
stat5.columns = pivot_column_labels
stat5 = stat5.reset_index()

stat5

Unnamed: 0,InstitutionCategory_2,Succession_Broad,Between PI Trans,Within PI Trans
0,Government,KIS to KJI,171,90
1,Government,KJI to KJU,107,169
2,Military,KIS to KJI,16,4
3,Military,KJI to KJU,22,7
4,Party,KIS to KJI,201,143
5,Party,KJI to KJU,195,96
6,Social,KIS to KJI,37,12
7,Social,KJI to KJU,39,13


In [167]:
stat5["Percentage Between PI Trans"] = stat5.apply(lambda x: round(x["Between PI Trans"] / (x["Between PI Trans"] + x["Within PI Trans"]),4) if (x["Between PI Trans"] + x["Within PI Trans"]) > 0 else np.nan,axis=1)
stat5

Unnamed: 0,InstitutionCategory_2,Succession_Broad,Between PI Trans,Within PI Trans,Percentage Between PI Trans
0,Government,KIS to KJI,171,90,0.6552
1,Government,KJI to KJU,107,169,0.3877
2,Military,KIS to KJI,16,4,0.8
3,Military,KJI to KJU,22,7,0.7586
4,Party,KIS to KJI,201,143,0.5843
5,Party,KJI to KJU,195,96,0.6701
6,Social,KIS to KJI,37,12,0.7551
7,Social,KJI to KJU,39,13,0.75


In [168]:
ics = list(trans.InstitutionCategory_2.unique())
ics

['Government', 'Social', 'Party', 'Military']

In [171]:
ttest_by_institution_category_2(ics,stat5,2,3,0,1)



 Government 


171 107 261 276 0.05
p1:	 0.6551724137931034
p2:	 0.38768115942028986
p1-p2:	 0.26749125437281357
z:	 6.200107861806762
pvalue:	 2.821224365234798e-10***


 Social 


37 39 49 52 0.05
p1:	 0.7551020408163265
p2:	 0.75
p1-p2:	 0.005102040816326481
z:	 0.059378268184122555
pvalue:	 0.47632541099482384


 Party 


201 195 344 291 0.05
p1:	 0.5843023255813954
p2:	 0.6701030927835051
p1-p2:	 -0.08580076720210972
z:	 -2.2236027689395153
pvalue:	 0.013087591051327951*


 Military 


16 22 20 29 0.05
p1:	 0.8
p2:	 0.7586206896551724
p1-p2:	 0.04137931034482767
z:	 0.3411985242228634
pvalue:	 0.3664770670999453


### 6. s1s2 trans count (within & between PI), succession narrow, by receving PI

In [172]:
stat6_columns = ["InstitutionCategory_2","PISame","Succession_Narrow","OrgName_2"]
stat6_groupby_columns = ["InstitutionCategory_2","Succession_Narrow","PISame"]

In [173]:
stat6 = trans[stat6_columns].groupby(stat6_groupby_columns,as_index=False)["OrgName_2"].agg({"Total Transitions":"count"})

# pivot PISame from column to rows
stat6 = stat6.pivot(index=["InstitutionCategory_2","Succession_Narrow",],columns="PISame",values="Total Transitions")
# pivot_column_labels = ["Succession_Broad","Between PI Trans","Within PI Trans"]
pivot_column_labels = ["Between PI Trans","Within PI Trans"]
stat6.columns = pivot_column_labels
stat6 = stat6.reset_index()

stat6

Unnamed: 0,InstitutionCategory_2,Succession_Narrow,Between PI Trans,Within PI Trans
0,Government,KIS to KJI,92,56
1,Government,KJI to KJU,46,51
2,Military,KIS to KJI,18,2
3,Military,KJI to KJU,14,4
4,Party,KIS to KJI,67,66
5,Party,KJI to KJU,173,85
6,Social,KIS to KJI,19,3
7,Social,KJI to KJU,9,1


In [174]:
stat6["Percentage Between PI Trans"] = stat6.apply(lambda x: round(x["Between PI Trans"] / (x["Between PI Trans"] + x["Within PI Trans"]),4) if (x["Between PI Trans"] + x["Within PI Trans"]) > 0 else np.nan,axis=1)
stat6

Unnamed: 0,InstitutionCategory_2,Succession_Narrow,Between PI Trans,Within PI Trans,Percentage Between PI Trans
0,Government,KIS to KJI,92,56,0.6216
1,Government,KJI to KJU,46,51,0.4742
2,Military,KIS to KJI,18,2,0.9
3,Military,KJI to KJU,14,4,0.7778
4,Party,KIS to KJI,67,66,0.5038
5,Party,KJI to KJU,173,85,0.6705
6,Social,KIS to KJI,19,3,0.8636
7,Social,KJI to KJU,9,1,0.9


In [175]:
ics = list(trans.InstitutionCategory_2.unique())
ics

['Government', 'Social', 'Party', 'Military']

In [176]:
ttest_by_institution_category_2(ics,stat6,2,3,0,1)



 Government 


92 46 148 97 0.05
p1:	 0.6216216216216216
p2:	 0.4742268041237113
p1-p2:	 0.14739481749791028
z:	 2.274838389522843
pvalue:	 0.011457813185327792*


 Social 


19 9 22 10 0.05
p1:	 0.8636363636363636
p2:	 0.9
p1-p2:	 -0.036363636363636376
z:	 -0.28829998806257895
pvalue:	 0.38655855814547635


 Party 


67 173 133 258 0.05
p1:	 0.5037593984962406
p2:	 0.6705426356589147
p1-p2:	 -0.16678323716267407
z:	 -3.2090933948882228
pvalue:	 0.0006657711611772754***


 Military 


18 14 20 18 0.05
p1:	 0.9
p2:	 0.7777777777777778
p1-p2:	 0.12222222222222223
z:	 1.031674295430419
pvalue:	 0.15111236144659512
