In [1]:
import pandas as pd
import numpy as np
from datetime import date
from statistics import mean, mode
from pandas.api.types import CategoricalDtype
import math
from scipy.stats import norm

In [2]:
today = date.today()
print(today)

2024-02-16


# Tables

In [3]:
path_tables = "C:/Users/seoul/Dropbox/00 technical/github/nkelites/data/combined data/combined data - 2 tables/"

In [4]:
# tables
filename_careerorglink = "careerorglink.xlsx"
filename_leadercareerlink = "leadercareerlink.xlsx"
filename_orgtree = "orgtree.xlsx"
filename_elected = "positions_elected.xlsx"

In [5]:
# career-org link
# col = pd.read_excel(path_tables + filename_careerorglink,dtype="str")
# col.shape

In [6]:
# col.columns

In [7]:
# leader-career link
# lcl = pd.read_excel(path_tables + filename_leadercareerlink,dtype="str")
# lcl.shape

In [8]:
# lcl.columns

In [9]:
# orgtree
# org = pd.read_excel(path_tables + filename_orgtree,dtype="str")
# org.shape

In [10]:
# org.columns

In [11]:
# elected = pd.read_excel(path_tables + filename_elected,dtype="str")
# elected.shape

In [12]:
# elected.columns

# Queries

In [13]:
path_queries = "C:/Users/seoul/Dropbox/00 technical/github/nkelites/data/combined data/combined data - 3 queries/"

In [14]:
filename_leaderjob_all = "leaderjob_electUnelect_inOutgov.xlsx"
filename_leaderjob_no_spa = "leaderjob_no_spa.xlsx"
filename_leaderjobtransition_no_spa = "leaderjobtransition_no_spa.xlsx"

In [15]:
# leader jobs
# ljobs_all = pd.read_excel(path_queries + filename_leaderjob_all,dtype="str")
# ljobs_all.shape

In [16]:
# ljobs = pd.read_excel(path_queries + filename_leaderjob_no_spa,dtype="str")
# ljobs.shape

In [17]:
# transitions used for analysis - no SPA - no local-local
trans = pd.read_excel(path_queries + filename_leaderjobtransition_no_spa,dtype="str")
trans.shape

(4306, 31)

# Analysis - Research Note

In [18]:
path_analysis = "C:/Users/seoul/Dropbox/00 technical/github/nkelites/analysis/"

In [19]:
# analysis sub-paths
study0_path = "2023.10.04 Study 0 - research note/"
study1_path = "2023.10.04 Study 1 - political capital/"
study2_path = "2023.10.04 Study 2 - commitment vs control/"
study3_path = "2023.10.04 Study 3 - reds vs experts/"

In [20]:
# ljobs = ljobs.astype({"CareerStartYear":"int","CareerStartDate":"int"})
# ljobs.dtypes

# Functions

In [21]:
def merge_results(m):
    
    print("\nMerge Results...")
    print("")
    print("\tshape     :",m.shape)
    print("\tleft_only :",m[m["_merge"]=="left_only"].shape)
    print("\tboth      :",m[m["_merge"]=="both"].shape)
    print("\tright_only:",m[m["_merge"]=="right_only"].shape)

In [22]:
# using this on (PI,OrgName) will ensure unique & non-null keys
# using this on a larger df will ensure unique rows and non-null keys, but not unique keys

def unique_non_null_rows(olddf):
    
    df = olddf.copy()
    
    
    ### drop duplicates
    df = df.drop_duplicates(keep="first",ignore_index=True)

    ### drop null rows
    df.dropna(how="all",axis=0,inplace=True)
    
    # drop rows with null PI
    df = df[~(df["PrimaryInstitution"].isna())]
        
    ### drop rows where PI contains stop words
    stop_words_lower = ["uncertain","current","deprecated","please_revise"]
    df = df[ ~ df["PrimaryInstitution"].str.lower().isin(stop_words_lower)]
            

    print("\nUnique Non-null Rows...")
    print("")
    print("\tNon-unique rows:",olddf.shape)
    print("\tUnique rows    :",df.shape)

    ### sort
    df = df.sort_values(["PrimaryInstitution","OrgName"])
    
    return df

In [23]:
def create_time_series(series,group_var,count_var):
    
    yeardist = series.groupby(group_var,as_index=False).count().sort_values(group_var)
    
    max_year = max(series[group_var])
    x = pd.DataFrame({"year":pd.Series(range(min(yeardist[group_var].astype(int)),max(yeardist[group_var].astype(int))+1))})
    
    yeardist[group_var] = yeardist[group_var].astype(int,errors="ignore")
    
    ts = x.merge(yeardist,left_on="year",right_on=group_var,how="left")
    ts.loc[ts[count_var].isna(),count_var]=0
    # merge_results(ts)
    
    ts_columns = ["year",count_var]
    ts = ts[ts_columns]
    
    return ts

In [24]:
# create InstitutionCategory (English) variable InstitutionType

# use with apply. e.g., 
# df["InstitutionCategory] = df["InstitutionType"].apply(define_institution_category)

def define_institution_category(PI):
    
    c = "Social"
    
    if PI=="정권기관":
        c = "Government"
    elif PI=="노동당":
        c = "Party"
    elif PI=="인민군":
        c = "Military"
        
    return c

In [25]:
def two_sample_ttest_proportions(x1,x2,n1,n2,alpha):
    
    pstar = (x1+x2)/(n1+n2)
    p1 = x1/n1
    p2 = x2/n2
    z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
    pvalue = 1-norm.cdf(abs(z))
    
    # add significance stars to result
    stars = ""
    if pvalue<.05:
        stars = "*"
    if pvalue<.01:
        stars = "**"
    if pvalue<.001:
        stars = "***"
    result = str(pvalue)+stars
    
    print("x1,x2,n1,n2:",[x1,x2,n1,n2])
    print("p1:\t",p1)
    print("p2:\t",p2)
    print("p1-p2:\t",(p1-p2))
    print("z:\t",z)
    print("pvalue:\t",result)

# Format & Covariates

### 0. change datatypes

In [26]:
trans = trans.astype({"OrgAdvance":"int","PositionAdvance":"int",
                      "CareerStartYear_1":"int","CareerStartYear_2":"int",
                      "CareerStartDate_1":"int","CareerStartDate_2":"int"})
trans.dtypes

LeaderID                   object
CareerString_1             object
CareerDateString_2022_1    object
CareerStartYear_1           int32
CareerStartMonth_1         object
CareerStartDate_1           int32
CareerSubstring_1          object
InstitutionType_1          object
PrimaryInstitution_1       object
OrgName_1                  object
Local_1                    object
Position_1                 object
IsElected_1                object
OrgRank_1                  object
PositionRank_1             object
CareerString_2             object
CareerDateString_2022_2    object
CareerStartYear_2           int32
CareerStartMonth_2         object
CareerStartDate_2           int32
CareerSubstring_2          object
InstitutionType_2          object
PrimaryInstitution_2       object
OrgName_2                  object
Local_2                    object
Position_2                 object
IsElected_2                object
OrgRank_2                  object
PositionRank_2             object
OrgAdvance    

### 1. double-check exlcudes SPA jobs and local-local trans

In [27]:
trans.columns

Index(['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
       'OrgName_1', 'Local_1', 'Position_1', 'IsElected_1', 'OrgRank_1',
       'PositionRank_1', 'CareerString_2', 'CareerDateString_2022_2',
       'CareerStartYear_2', 'CareerStartMonth_2', 'CareerStartDate_2',
       'CareerSubstring_2', 'InstitutionType_2', 'PrimaryInstitution_2',
       'OrgName_2', 'Local_2', 'Position_2', 'IsElected_2', 'OrgRank_2',
       'PositionRank_2', 'OrgAdvance', 'PositionAdvance'],
      dtype='object')

In [28]:
trans.shape

(4306, 31)

In [29]:
trans.Local_1.unique()

array(['False', 'True'], dtype=object)

In [30]:
# remove Local-Local ties
trans = trans[~((trans["Local_1"]=="True") & (trans["Local_2"]=="True"))]
trans.shape

(4222, 31)

### 2. add InstitutionCategory_1, InstitutionCategory_2

In [31]:
trans.InstitutionType_1.unique()

array(['정권기관', '국제친선단체', '노동당', '인민군', '당외곽및사회단체_사회부문(별책)',
       '당외곽및사회단체_사회부문', '당외곽및사회단체_체육부문', '당외곽및사회단체_정치부문', '당외곽및사회단체_대외부문',
       '당외곽및사회단체_근로단체', '당외곽및사회단체_종교부문', '당외곽및사회단체_경제부문(별책)',
       '당외곽및사회단체_경제부문'], dtype=object)

### 3. PISame, OrgSame

In [32]:
trans.columns

Index(['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
       'OrgName_1', 'Local_1', 'Position_1', 'IsElected_1', 'OrgRank_1',
       'PositionRank_1', 'CareerString_2', 'CareerDateString_2022_2',
       'CareerStartYear_2', 'CareerStartMonth_2', 'CareerStartDate_2',
       'CareerSubstring_2', 'InstitutionType_2', 'PrimaryInstitution_2',
       'OrgName_2', 'Local_2', 'Position_2', 'IsElected_2', 'OrgRank_2',
       'PositionRank_2', 'OrgAdvance', 'PositionAdvance'],
      dtype='object')

In [33]:
trans["PISame"] = False
trans.loc[trans["PrimaryInstitution_1"]==trans["PrimaryInstitution_2"],["PISame"]] = True
trans.PISame.unique()

array([False,  True])

In [34]:
trans["OrgSame"] = np.nan
trans.loc[trans["PISame"]==True,"OrgSame"] = False
trans.loc[(trans["PISame"]==True) & (trans["OrgName_1"]==trans["OrgName_2"]),["OrgSame"]] = True
trans.OrgSame.unique()

array([nan, True, False], dtype=object)

### 4. OrgRankChange, PositionRankChange

In [35]:
trans["OrgRankChange"] = np.nan

In [36]:
trans.loc[trans["PISame"] & (trans["OrgRank_1"]>trans["OrgRank_2"]),"OrgRankChange"] = "lower"
trans.loc[trans["PISame"] & (trans["OrgRank_1"]==trans["OrgRank_2"]),"OrgRankChange"] = "same"
trans.loc[trans["PISame"] & (trans["OrgRank_1"]<trans["OrgRank_2"]),"OrgRankChange"] = "higher"

In [37]:
value_order = ["lower","same","higher"]
trans.OrgRankChange = trans.OrgRankChange.astype("category")
trans.OrgRankChange = trans.OrgRankChange.cat.set_categories(value_order)

In [38]:
trans.OrgRankChange.unique()

[NaN, 'same', 'higher', 'lower']
Categories (3, object): ['lower', 'same', 'higher']

In [39]:
trans["PositionRankChange"] = np.nan

In [40]:
# PositionRankChange, narrowly defined, with OrgSame=True
# trans.loc[trans["OrgSame"] & (trans["PositionRank_1"]>trans["PositionRank_2"]),"PositionRankChange"] = "lower"
# trans.loc[trans["OrgSame"] & (trans["PositionRank_1"]==trans["PositionRank_2"]),"PositionRankChange"] = "same"
# trans.loc[trans["OrgSame"] & (trans["PositionRank_1"]<trans["PositionRank_2"]),"PositionRankChange"] = "higher"

In [41]:
# PositionRankChange, broadly defined, with PISame=True & OrgRankChange="same"
trans.loc[trans["PISame"] & trans["OrgRankChange"].isin(["same"]) & (trans["PositionRank_1"]>trans["PositionRank_2"]),"PositionRankChange"] = "lower"
trans.loc[trans["PISame"] & trans["OrgRankChange"].isin(["same"]) & (trans["PositionRank_1"]==trans["PositionRank_2"]),"PositionRankChange"] = "same"
trans.loc[trans["PISame"] & trans["OrgRankChange"].isin(["same"]) & (trans["PositionRank_1"]<trans["PositionRank_2"]),"PositionRankChange"] = "higher"

In [42]:
value_order = ["lower","same","higher"]
trans.PositionRankChange = trans.PositionRankChange.astype("category")
trans.PositionRankChange = trans.PositionRankChange.cat.set_categories(value_order)

In [43]:
trans.PositionRankChange.unique()

[NaN, 'same', 'higher', 'lower']
Categories (3, object): ['lower', 'same', 'higher']

### 5. Succession Period - Broad

* KIS-->KJI: 1974-1993 - less institutionalized (stronger intra)
* KJI--->KJU:  2002-2011 - more institutionalized (stronger inter)

In [44]:
trans["Succession_Broad"] = np.nan

In [45]:
value_order = ["KIS to KJI","KJI to KJU"]
trans.Succession_Broad = trans.Succession_Broad.astype("category")
trans.Succession_Broad = trans.Succession_Broad.cat.set_categories(value_order)

In [46]:
trans.loc[(trans["CareerStartYear_2"]>1973) & (trans["CareerStartYear_2"]<1994),"Succession_Broad"] = "KIS to KJI"
trans.loc[(trans["CareerStartYear_2"]>2001) & (trans["CareerStartYear_2"]<2012),"Succession_Broad"] = "KJI to KJU"

In [47]:
trans[["Succession_Broad","OrgName_2"]].groupby("Succession_Broad",as_index=False).count()

Unnamed: 0,Succession_Broad,OrgName_2
0,KIS to KJI,674
1,KJI to KJU,648


In [48]:
trans.loc[trans["Succession_Broad"]=="KIS to KJI","CareerStartYear_2"]

34      1977
35      1977
36      1979
37      1980
38      1982
        ... 
3805    1980
3934    1993
4285    1977
4286    1980
4287    1990
Name: CareerStartYear_2, Length: 1006, dtype: int32

### 6. Succession Period - Narrow

#### Esther's periodization
* KIS-->KJI: 1987-1994 - less institutionalized (stronger intra)
* KJI--->KJU:  2009-2011 - more institutionalized (stronger inter)

In [49]:
trans["Succession_Narrow"] = np.nan

In [50]:
value_order = ["KIS to KJI","KJI to KJU"]
trans.Succession_Narrow = trans.Succession_Narrow.astype("category")
trans.Succession_Narrow = trans.Succession_Narrow.cat.set_categories(value_order)

In [51]:
### Esther's periodization

trans.loc[(trans["CareerStartYear_2"]>1986) & (trans["CareerStartYear_2"]<1995),"Succession_Narrow"] = "KIS to KJI"
trans.loc[(trans["CareerStartYear_2"]>2008) & (trans["CareerStartYear_2"]<2012),"Succession_Narrow"] = "KJI to KJU"

In [52]:
trans[["Succession_Narrow","OrgName_2"]].groupby("Succession_Narrow",as_index=False).count()

Unnamed: 0,Succession_Narrow,OrgName_2
0,KIS to KJI,323
1,KJI to KJU,383


### 7. OrgAdvanceYes

In [53]:
trans["OrgAdvanceYes"] = np.nan

In [54]:
trans.loc[trans["PISame"]==True,"OrgAdvanceYes"] = False
trans.loc[trans["PISame"]==True & (trans["OrgRank_1"]>trans["OrgRank_2"]),"OrgAdvanceYes"] = True

### 8. PositionAdvanceYes

In [55]:
trans["PositionAdvanceYes"] = np.nan

In [56]:
trans.loc[trans["OrgSame"]==True,"PositionAdvanceYes"] = False
trans.loc[trans["OrgSame"]==True & (trans["PositionRank_1"]>trans["PositionRank_2"]),"PositionAdvanceYes"] = True

### 9. InstitutionCategory

In [57]:
trans.columns

Index(['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
       'OrgName_1', 'Local_1', 'Position_1', 'IsElected_1', 'OrgRank_1',
       'PositionRank_1', 'CareerString_2', 'CareerDateString_2022_2',
       'CareerStartYear_2', 'CareerStartMonth_2', 'CareerStartDate_2',
       'CareerSubstring_2', 'InstitutionType_2', 'PrimaryInstitution_2',
       'OrgName_2', 'Local_2', 'Position_2', 'IsElected_2', 'OrgRank_2',
       'PositionRank_2', 'OrgAdvance', 'PositionAdvance', 'PISame', 'OrgSame',
       'OrgRankChange', 'PositionRankChange', 'Succession_Broad',
       'Succession_Narrow', 'OrgAdvanceYes', 'PositionAdvanceYes'],
      dtype='object')

In [58]:
trans["InstitutionCategory_1"] = trans["InstitutionType_1"].apply(define_institution_category)
trans["InstitutionCategory_2"] = trans["InstitutionType_2"].apply(define_institution_category)

### 10. InstitutionCategorySame

In [59]:
trans["InstitutionCategorySame"] = trans["InstitutionCategory_1"] == trans["InstitutionCategory_2"]

### 11. Succession_Five

#### Jacob's periodization
* KIS-->KJI: 1994-1999 - less institutionalized (stronger intra)
* KJI--->KJU:  2011-2016 - more institutionalized (stronger inter)

In [60]:
trans["Succession_Five"] = np.nan

In [61]:
value_order = ["KIS to KJI","KJI to KJU"]
trans.Succession_Five = trans.Succession_Five.astype("category")
trans.Succession_Five = trans.Succession_Five.cat.set_categories(value_order)

In [62]:
### Jacob's periodization

trans.loc[(trans["CareerStartYear_2"]>1993) & (trans["CareerStartYear_2"]<2000),"Succession_Five"] = "KIS to KJI"
trans.loc[(trans["CareerStartYear_2"]>2010) & (trans["CareerStartYear_2"]<2016),"Succession_Five"] = "KJI to KJU"

In [63]:
trans[["Succession_Five","OrgName_2"]].groupby("Succession_Five",as_index=False).count()

Unnamed: 0,Succession_Five,OrgName_2
0,KIS to KJI,229
1,KJI to KJU,229


### double-check OrgRank

In [64]:
# select_cols = ["InstitutionType_1","PrimaryInstitution_1","OrgName_1","OrgRank_1"]
select_cols = ["InstitutionType_2","PrimaryInstitution_2","OrgName_2","OrgRank_2"]

In [65]:
# pd.set_option('display.max_rows', None)
# trans[select_cols]

### 12. Restrict to Top 2 OrgRanks (OrgRank = {0,1})

In [66]:
trans.columns

Index(['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
       'OrgName_1', 'Local_1', 'Position_1', 'IsElected_1', 'OrgRank_1',
       'PositionRank_1', 'CareerString_2', 'CareerDateString_2022_2',
       'CareerStartYear_2', 'CareerStartMonth_2', 'CareerStartDate_2',
       'CareerSubstring_2', 'InstitutionType_2', 'PrimaryInstitution_2',
       'OrgName_2', 'Local_2', 'Position_2', 'IsElected_2', 'OrgRank_2',
       'PositionRank_2', 'OrgAdvance', 'PositionAdvance', 'PISame', 'OrgSame',
       'OrgRankChange', 'PositionRankChange', 'Succession_Broad',
       'Succession_Narrow', 'OrgAdvanceYes', 'PositionAdvanceYes',
       'InstitutionCategory_1', 'InstitutionCategory_2',
       'InstitutionCategorySame', 'Succession_Five'],
      dtype='object')

In [67]:
### reset trans
# trans_orig = trans
# trans = trans_orig

In [68]:
trans["OrgRank_1"].astype(int).isin([0,1])

0       True
1       True
2       True
3       True
4       True
        ... 
4300    True
4301    True
4302    True
4303    True
4304    True
Name: OrgRank_1, Length: 4222, dtype: bool

In [69]:
trans.shape

(4222, 43)

In [70]:
# Top 1 OrgRank
trans2 = trans[(trans["OrgRank_1"].astype(int).isin([0,1])) & (trans["OrgRank_2"].astype(int).isin([0,1]))]

In [71]:
trans2.shape

(2190, 43)

### double-check OrgRank

In [72]:
select_cols = ["InstitutionType_1","PrimaryInstitution_1","OrgName_1","OrgRank_1"]
# select_cols = ["InstitutionType_2","PrimaryInstitution_2","OrgName_2","OrgRank_2"]

In [73]:
# pd.set_option('display.max_rows', None)
# trans2[select_cols]

In [74]:
# OrgName NA but OrgRank > 0
trans2.loc[(trans2["OrgName_1"].isna()) & (trans2["OrgRank_1"].astype(int)>0),select_cols]

Unnamed: 0,InstitutionType_1,PrimaryInstitution_1,OrgName_1,OrgRank_1


In [75]:
# OrgName not NA but OrgRank == 0
trans2.loc[(~trans2["OrgName_1"].isna()) & (trans2["OrgRank_1"].astype(int)==0),select_cols]

Unnamed: 0,InstitutionType_1,PrimaryInstitution_1,OrgName_1,OrgRank_1


In [76]:
# OrgName contains _ but OrgRank < 2
trans2.loc[(trans2["OrgName_1"].str.contains("_")) & (trans2["OrgRank_1"].astype(int)<2),select_cols]

Unnamed: 0,InstitutionType_1,PrimaryInstitution_1,OrgName_1,OrgRank_1


In [77]:
trans = trans2

# Statistics

### 1. OrgRank increase - percent by year

In [78]:
stat1_columns = ["CareerStartYear_2","OrgAdvanceYes"]
stat1_groupby_columns = ["CareerStartYear_2"]
stat1_label_columns = ["Year","Total Transitions","Total Advancements of OrgRank"]

In [79]:
stat1 = trans[stat1_columns].groupby(stat1_groupby_columns,as_index=False)["OrgAdvanceYes"].agg({"Total Transitions":"count","Total Advancements of OrgRank":"sum"})
stat1.columns = stat1_label_columns
stat1["Percentage Advancements of OrgRank"] = stat1.apply(lambda x: round(x["Total Advancements of OrgRank"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
stat1

Unnamed: 0,Year,Total Transitions,Total Advancements of OrgRank,Percentage Advancements of OrgRank
0,1946,1,True,100.00
1,1947,1,True,100.00
2,1948,1,True,100.00
3,1950,2,2,100.00
4,1951,1,False,0.00
...,...,...,...,...
67,2017,25,21,84.00
68,2018,18,13,72.22
69,2019,77,48,62.34
70,2020,9,6,66.67


In [80]:
stat1_filename = "orgrank_within_between.xlsx"
# stat1.to_excel(path_analysis + study0_path + stat2_filename,index=False)

### 2. OrgRank increase - percent by year, PISame = True

In [81]:
trans.columns

Index(['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
       'OrgName_1', 'Local_1', 'Position_1', 'IsElected_1', 'OrgRank_1',
       'PositionRank_1', 'CareerString_2', 'CareerDateString_2022_2',
       'CareerStartYear_2', 'CareerStartMonth_2', 'CareerStartDate_2',
       'CareerSubstring_2', 'InstitutionType_2', 'PrimaryInstitution_2',
       'OrgName_2', 'Local_2', 'Position_2', 'IsElected_2', 'OrgRank_2',
       'PositionRank_2', 'OrgAdvance', 'PositionAdvance', 'PISame', 'OrgSame',
       'OrgRankChange', 'PositionRankChange', 'Succession_Broad',
       'Succession_Narrow', 'OrgAdvanceYes', 'PositionAdvanceYes',
       'InstitutionCategory_1', 'InstitutionCategory_2',
       'InstitutionCategorySame', 'Succession_Five'],
      dtype='object')

In [82]:
stat2_columns = ["CareerStartYear_2","OrgAdvanceYes"]
stat2_condition = trans["PISame"]==True
stat2_groupby_columns = ["CareerStartYear_2"]
stat2_label_columns = ["Year","Total Transitions","Total Advancements of OrgRank"]

In [83]:
stat2 = trans.loc[stat2_condition,stat2_columns].groupby(stat2_groupby_columns,as_index=False)["OrgAdvanceYes"].agg({"Total Transitions":"count","Total Advancements of OrgRank":"sum"})
stat2.columns = stat2_label_columns
stat2["Percentage Advancements of OrgRank"] = stat2.apply(lambda x: round(x["Total Advancements of OrgRank"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
stat2

Unnamed: 0,Year,Total Transitions,Total Advancements of OrgRank,Percentage Advancements of OrgRank
0,1951,1,False,0.0
1,1956,1,False,0.0
2,1959,2,0,0.0
3,1960,1,False,0.0
4,1961,2,0,0.0
5,1962,1,False,0.0
6,1965,3,0,0.0
7,1966,2,1,50.0
8,1967,2,1,50.0
9,1970,9,1,11.11


In [84]:
# pivot SameInstitution - 

In [85]:
# stat2_filename = "orgrank_within.xlsx"
# stat2.to_excel(path_analysis + study0_path + stat2_filename,index=False)

### 3. OrgRank increase - percent by succesion period (broad)

In [86]:
trans.columns

Index(['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
       'OrgName_1', 'Local_1', 'Position_1', 'IsElected_1', 'OrgRank_1',
       'PositionRank_1', 'CareerString_2', 'CareerDateString_2022_2',
       'CareerStartYear_2', 'CareerStartMonth_2', 'CareerStartDate_2',
       'CareerSubstring_2', 'InstitutionType_2', 'PrimaryInstitution_2',
       'OrgName_2', 'Local_2', 'Position_2', 'IsElected_2', 'OrgRank_2',
       'PositionRank_2', 'OrgAdvance', 'PositionAdvance', 'PISame', 'OrgSame',
       'OrgRankChange', 'PositionRankChange', 'Succession_Broad',
       'Succession_Narrow', 'OrgAdvanceYes', 'PositionAdvanceYes',
       'InstitutionCategory_1', 'InstitutionCategory_2',
       'InstitutionCategorySame', 'Succession_Five'],
      dtype='object')

In [87]:
stat3_columns = ["Succession_Broad","OrgAdvanceYes"]
stat3_condition = trans["PISame"]==True
stat3_groupby_columns = ["Succession_Broad"]
stat3_label_columns = ["Succession (Broad)","Total Transitions","Total Advancements of OrgRank"]

In [88]:
stat3 = trans.loc[stat3_condition,stat3_columns].groupby(stat3_groupby_columns,as_index=False)["OrgAdvanceYes"].agg({"Total Transitions":"count","Total Advancements of OrgRank":"sum"})
stat3.columns = stat3_label_columns
stat3["Percentage Advancements of OrgRank"] = stat3.apply(lambda x: round(x["Total Advancements of OrgRank"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
stat3

Unnamed: 0,Succession (Broad),Total Transitions,Total Advancements of OrgRank,Percentage Advancements of OrgRank
0,KIS to KJI,119,10,8.4
1,KJI to KJU,134,14,10.45


### 4. Transitions by period and institution 

In [89]:
stat9_columns = ["Succession_Broad","OrgName_2"]
stat9_condition = trans["PISame"]==True
stat9_groupby_columns = ["Succession_Broad"]
stat9_label_columns = ["Succession (Broad)","Total Transitions","Total Advancements of OrgRank"]

In [90]:
trans.head()

Unnamed: 0,LeaderID,CareerString_1,CareerDateString_2022_1,CareerStartYear_1,CareerStartMonth_1,CareerStartDate_1,CareerSubstring_1,InstitutionType_1,PrimaryInstitution_1,OrgName_1,...,OrgRankChange,PositionRankChange,Succession_Broad,Succession_Narrow,OrgAdvanceYes,PositionAdvanceYes,InstitutionCategory_1,InstitutionCategory_2,InstitutionCategorySame,Succession_Five
0,리원일,1997.11 노동행정부 부부장,,1997,11,199711,,정권기관,정무원,노동행정부,...,,,,,True,,Government,Government,True,KIS to KJI
1,리원일,노동성 상 *후임 : 강영수,1997.11,1997,11,199711,,정권기관,내각,노동성,...,same,same,,,False,False,Government,Government,True,KIS to KJI
2,리원일,1998. 9 노동상,,1998,9,199809,,정권기관,내각,노동성,...,,,,,,,Government,Social,False,KIS to KJI
3,리원일,북-이란 친선협회 위원장,1998.11,1998,11,199811,,국제친선단체,북-이란 친선협회,,...,,,,,True,,Social,Government,False,KIS to KJI
4,리원일,북-이란 친선협회 위원장,1998.11,1998,11,199811,,국제친선단체,북-이란 친선협회,,...,same,same,,,False,True,Social,Social,True,KIS to KJI


In [91]:
# stat9 = trans.loc[stat9_condition,stat9_columns].groupby(stat9_groupby_columns,as_index=False)["OrgAdvanceYes"].agg({"Total Transitions":"count","Total Advancements of OrgRank":"sum"})
# stat9 = trans.loc[stat9_condition,stat9_columns].groupby(stat9_groupby_columns,as_index=False)["OrgName_2"].agg({"Total Transitions":"count","Total Advancements of OrgRank":"sum"})


# Hypothesis 1a - Org Advancement, broad succession periods

In [92]:
select_columns = ["Succession_Broad","LeaderID"]
select_rows = trans["PISame"]==True
group_columns = ["Succession_Broad"]
trans.loc[select_rows,select_columns].groupby(group_columns,as_index=False).count()

Unnamed: 0,Succession_Broad,LeaderID
0,KIS to KJI,119
1,KJI to KJU,134


In [93]:
stat4_rows = trans["PISame"]==True
stat4_columns = ["Succession_Broad","OrgAdvanceYes"]
stat4_groupby = ["Succession_Broad"]
stat4_labels = ["Succession Period","Total Transitions","Total Advancements"]

In [94]:
stat4 = trans.loc[stat4_rows,stat4_columns].groupby(stat4_groupby,as_index=False)["OrgAdvanceYes"].agg({"Total Transitions":"count","Total Advancements":"sum"})
stat4

Unnamed: 0,Succession_Broad,Total Transitions,Total Advancements
0,KIS to KJI,119,10
1,KJI to KJU,134,14


In [95]:
stat4["Percentage Advancements of OrgRank"] = stat4.apply(lambda x: round(x["Total Advancements"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
stat4

Unnamed: 0,Succession_Broad,Total Transitions,Total Advancements,Percentage Advancements of OrgRank
0,KIS to KJI,119,10,8.4
1,KJI to KJU,134,14,10.45


In [96]:
x1 = stat4.iloc[0,2]
x2 = stat4.iloc[1,2]
n1 = stat4.iloc[0,1]
n2 = stat4.iloc[1,1]

In [97]:
# 2-sample test of proportions
# def two_sample_ttest_proportions(x1,x2,n1,n2,alpha):
two_sample_ttest_proportions(x1,x2,n1,n2,.05)

x1,x2,n1,n2: [10, 14, 119, 134]
p1:	 0.08403361344537816
p2:	 0.1044776119402985
p1-p2:	 -0.020443998494920346
z:	 -0.5538964972533995
pvalue:	 0.2898248412257157


# Hypothesis 1b - Position Advancement, broad succession periods

In [98]:
select_columns = ["Succession_Broad","LeaderID"]
select_rows = (trans["PISame"] & trans["OrgRankChange"].isin(["same"]))
group_columns = ["Succession_Broad"]
trans.loc[select_rows,select_columns].groupby(group_columns,as_index=False).count()

Unnamed: 0,Succession_Broad,LeaderID
0,KIS to KJI,96
1,KJI to KJU,110


In [99]:
stat4b_rows = (trans["PISame"] & trans["OrgRankChange"].isin(["same"]))
stat4b_columns = ["Succession_Broad","PositionAdvanceYes"]
stat4b_groupby = ["Succession_Broad"]
stat4b_labels = ["Succession Period","Total Transitions","Total Advancements"]

In [100]:
stat4b = trans.loc[stat4b_rows,stat4b_columns].groupby(stat4b_groupby,as_index=False)["PositionAdvanceYes"].agg({"Total Transitions":"count","Total Advancements":"sum"})
stat4b

Unnamed: 0,Succession_Broad,Total Transitions,Total Advancements
0,KIS to KJI,84,48
1,KJI to KJU,98,51


In [101]:
stat4b["Percentage Advancements of Position Rank"] = stat4b.apply(lambda x: round(x["Total Advancements"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
stat4b

Unnamed: 0,Succession_Broad,Total Transitions,Total Advancements,Percentage Advancements of Position Rank
0,KIS to KJI,84,48,57.14
1,KJI to KJU,98,51,52.04


In [102]:
x1 = stat4b.iloc[0,2]
x2 = stat4b.iloc[1,2]
n1 = stat4b.iloc[0,1]
n2 = stat4b.iloc[1,1]

In [103]:
# 2-sample test of proportions
# def two_sample_ttest_proportions(x1,x2,n1,n2,alpha):
two_sample_ttest_proportions(x1,x2,n1,n2,.05)

x1,x2,n1,n2: [48, 51, 84, 98]
p1:	 0.5714285714285714
p2:	 0.5204081632653061
p1-p2:	 0.05102040816326525
z:	 0.6889308933634738
pvalue:	 0.2454333786022065


# Hypothesis 1c - Org Advancement, broad succession, by Institution

In [104]:
select_rows = trans["PISame"]==True
select_columns = ["Succession_Broad","LeaderID"]
group_columns = ["Succession_Broad"]
trans.loc[select_rows,select_columns].groupby(group_columns,as_index=False).count()

Unnamed: 0,Succession_Broad,LeaderID
0,KIS to KJI,119
1,KJI to KJU,134


In [105]:
select_columns = ["InstitutionCategorySame","PISame","LeaderID"]
groupby_columns1 = ["InstitutionCategorySame"]
groupby_columns2 = ["PISame"]
groupby_columns3 = ["InstitutionCategorySame","PISame"]

In [106]:
trans[select_columns].groupby(groupby_columns3).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,LeaderID
InstitutionCategorySame,PISame,Unnamed: 2_level_1
False,False,1058
True,False,630
True,True,502


In [107]:
ics = list(trans.InstitutionCategory_2.unique())
ics

['Government', 'Social', 'Party', 'Military']

In [108]:
for ic in ics:
    print("\n\n",ic,"\n\n")
    stat4c_rows = (trans["InstitutionCategory_2"]==ic) & (trans["PISame"]==True)
    stat4c_columns = ["Succession_Broad","OrgAdvanceYes"]
    stat4c_groupby = ["Succession_Broad"]
    stat4c_labels = ["Succession Period","Total Transitions","Total Advancements"]
    
    stat4c = trans.loc[stat4c_rows,stat4c_columns].groupby(stat4c_groupby,as_index=False)["OrgAdvanceYes"].agg({"Total Transitions":"count","Total Advancements":"sum"})    
    stat4c["Percentage Advancements of OrgRank"] = stat4c.apply(lambda x: round(x["Total Advancements"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
   
    print(stat4c,"\n")
    
    x1 = stat4c.iloc[0,2]
    x2 = stat4c.iloc[1,2]
    n1 = stat4c.iloc[0,1]
    n2 = stat4c.iloc[1,1]
    
    two_sample_ttest_proportions(x1,x2,n1,n2,.05)



 Government 


  Succession_Broad  Total Transitions Total Advancements  \
0       KIS to KJI                 62                  7   
1       KJI to KJU                 96                 11   

   Percentage Advancements of OrgRank  
0                               11.29  
1                               11.46   

x1,x2,n1,n2: [7, 11, 62, 96]
p1:	 0.11290322580645161
p2:	 0.11458333333333333
p1-p2:	 -0.001680107526881719
z:	 -0.032456135798665166
pvalue:	 0.48705414807304515


 Social 


  Succession_Broad  Total Transitions Total Advancements  \
0       KIS to KJI                 24                  3   
1       KJI to KJU                 16                  1   

   Percentage Advancements of OrgRank  
0                               12.50  
1                                6.25   

x1,x2,n1,n2: [3, 1, 24, 16]
p1:	 0.125
p2:	 0.0625
p1-p2:	 0.0625
z:	 0.6454972243679028
pvalue:	 0.25930250821436274


 Party 


  Succession_Broad  Total Transitions Total Advancements  \
0       KI

  p2 = x2/n2
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))


# Hypothesis 1d - Position Advancement, broad succession, by Institution

In [109]:
ics = list(trans.InstitutionCategory_2.unique())
for ic in ics:
    print("\n\n",ic,"\n\n")
    stat4c_rows = (trans["InstitutionCategory_2"]==ic) & (trans["PISame"]==True) & (trans["OrgRankChange"].isin(["same"]))
    stat4c_columns = ["Succession_Broad","PositionAdvanceYes"]
    stat4c_groupby = ["Succession_Broad"]
    stat4c_labels = ["Succession Period","Total Transitions","Total Advancements"]
    
    stat4c = trans.loc[stat4c_rows,stat4c_columns].groupby(stat4c_groupby,as_index=False)["PositionAdvanceYes"].agg({"Total Transitions":"count","Total Advancements":"sum"})    
    stat4c["Percentage Advancements"] = stat4c.apply(lambda x: round(x["Total Advancements"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
   
    print(stat4c,"\n")
    
    x1 = stat4c.iloc[0,2]
    x2 = stat4c.iloc[1,2]
    n1 = stat4c.iloc[0,1]
    n2 = stat4c.iloc[1,1]
    
    two_sample_ttest_proportions(x1,x2,n1,n2,.05)



 Government 


  Succession_Broad  Total Transitions Total Advancements  \
0       KIS to KJI                 39                 27   
1       KJI to KJU                 75                 29   

   Percentage Advancements  
0                    69.23  
1                    38.67   

x1,x2,n1,n2: [27, 29, 39, 75]
p1:	 0.6923076923076923
p2:	 0.38666666666666666
p1-p2:	 0.30564102564102563
z:	 3.096841647953146
pvalue:	 0.0009779716358979673***


 Social 


  Succession_Broad  Total Transitions Total Advancements  \
0       KIS to KJI                 15                 13   
1       KJI to KJU                  9                  8   

   Percentage Advancements  
0                    86.67  
1                    88.89   

x1,x2,n1,n2: [13, 8, 15, 9]
p1:	 0.8666666666666667
p2:	 0.8888888888888888
p1-p2:	 -0.022222222222222143
z:	 -0.1593638145779186
pvalue:	 0.43669112315483205


 Party 


  Succession_Broad  Total Transitions Total Advancements  \
0       KIS to KJI                 2

  p2 = x2/n2
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))


# Hypothesis 2a - Org Advancement, narrow succession periods

In [110]:
select_columns = ["Succession_Narrow","LeaderID"]
select_rows = trans["PISame"]==True
group_columns = ["Succession_Narrow"]
trans.loc[select_rows,select_columns].groupby(group_columns,as_index=False).count()

Unnamed: 0,Succession_Narrow,LeaderID
0,KIS to KJI,57
1,KJI to KJU,66


In [111]:
stat4_rows = trans["PISame"]==True
stat4_columns = ["Succession_Narrow","OrgAdvanceYes"]
stat4_groupby = ["Succession_Narrow"]
stat4_labels = ["Succession Period","Total Transitions","Total Advancements"]

In [112]:
stat4 = trans.loc[stat4_rows,stat4_columns].groupby(stat4_groupby,as_index=False)["OrgAdvanceYes"].agg({"Total Transitions":"count","Total Advancements":"sum"})
stat4

Unnamed: 0,Succession_Narrow,Total Transitions,Total Advancements
0,KIS to KJI,57,5
1,KJI to KJU,66,8


In [113]:
stat4["Percentage Advancements of OrgRank"] = stat4.apply(lambda x: round(x["Total Advancements"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
stat4

Unnamed: 0,Succession_Narrow,Total Transitions,Total Advancements,Percentage Advancements of OrgRank
0,KIS to KJI,57,5,8.77
1,KJI to KJU,66,8,12.12


In [114]:
x1 = stat4.iloc[0,2]
x2 = stat4.iloc[1,2]
n1 = stat4.iloc[0,1]
n2 = stat4.iloc[1,1]

In [115]:
# 2-sample test of proportions
# def two_sample_ttest_proportions(x1,x2,n1,n2,alpha):
two_sample_ttest_proportions(x1,x2,n1,n2,.05)

x1,x2,n1,n2: [5, 8, 57, 66]
p1:	 0.08771929824561403
p2:	 0.12121212121212122
p1-p2:	 -0.033492822966507185
z:	 -0.6024842338040619
pvalue:	 0.27342592741177296


# Hypothesis 2b - Position Advancement, narrow succession periods

In [116]:
select_columns = ["Succession_Narrow","LeaderID"]
select_rows = (trans["PISame"] & trans["OrgRankChange"].isin(["same"]))
group_columns = ["Succession_Narrow"]
trans.loc[select_rows,select_columns].groupby(group_columns,as_index=False).count()

Unnamed: 0,Succession_Narrow,LeaderID
0,KIS to KJI,48
1,KJI to KJU,52


In [117]:
stat4b_rows = (trans["PISame"] & trans["OrgRankChange"].isin(["same"]))
stat4b_columns = ["Succession_Narrow","PositionAdvanceYes"]
stat4b_groupby = ["Succession_Narrow"]
stat4b_labels = ["Succession Period","Total Transitions","Total Advancements"]

In [118]:
stat4b = trans.loc[stat4b_rows,stat4b_columns].groupby(stat4b_groupby,as_index=False)["PositionAdvanceYes"].agg({"Total Transitions":"count","Total Advancements":"sum"})
stat4b

Unnamed: 0,Succession_Narrow,Total Transitions,Total Advancements
0,KIS to KJI,45,22
1,KJI to KJU,49,20


In [119]:
stat4b["Percentage Advancements of Position Rank"] = stat4b.apply(lambda x: round(x["Total Advancements"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
stat4b

Unnamed: 0,Succession_Narrow,Total Transitions,Total Advancements,Percentage Advancements of Position Rank
0,KIS to KJI,45,22,48.89
1,KJI to KJU,49,20,40.82


In [120]:
x1 = stat4b.iloc[0,2]
x2 = stat4b.iloc[1,2]
n1 = stat4b.iloc[0,1]
n2 = stat4b.iloc[1,1]

In [121]:
# 2-sample test of proportions
# def two_sample_ttest_proportions(x1,x2,n1,n2,alpha):
two_sample_ttest_proportions(x1,x2,n1,n2,.05)

x1,x2,n1,n2: [22, 20, 45, 49]
p1:	 0.4888888888888889
p2:	 0.40816326530612246
p1-p2:	 0.08072562358276641
z:	 0.7864177643501911
pvalue:	 0.21581138977741743


# Hypothesis 2c - Org Advancement, narrow succession, by Institution

In [122]:
ics = list(trans.InstitutionCategory_2.unique())
for ic in ics:
    print("\n\n",ic,"\n\n")
    stat4c_rows = (trans["InstitutionCategory_2"]==ic) & (trans["PISame"]==True)
    stat4c_columns = ["Succession_Narrow","OrgAdvanceYes"]
    stat4c_groupby = ["Succession_Narrow"]
    stat4c_labels = ["Succession Period","Total Transitions","Total Advancements"]
    
    stat4c = trans.loc[stat4c_rows,stat4c_columns].groupby(stat4c_groupby,as_index=False)["OrgAdvanceYes"].agg({"Total Transitions":"count","Total Advancements":"sum"})    
    stat4c["Percentage Advancements of OrgRank"] = stat4c.apply(lambda x: round(x["Total Advancements"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
   
    print(stat4c,"\n")
    
    x1 = stat4c.iloc[0,2]
    x2 = stat4c.iloc[1,2]
    n1 = stat4c.iloc[0,1]
    n2 = stat4c.iloc[1,1]
    
    two_sample_ttest_proportions(x1,x2,n1,n2,.05)



 Government 


  Succession_Narrow  Total Transitions Total Advancements  \
0        KIS to KJI                 33                  3   
1        KJI to KJU                 53                  7   

   Percentage Advancements of OrgRank  
0                                9.09  
1                               13.21   

x1,x2,n1,n2: [3, 7, 33, 53]
p1:	 0.09090909090909091
p2:	 0.1320754716981132
p1-p2:	 -0.041166380789022294
z:	 -0.5791352785399175
pvalue:	 0.2812489487237354


 Social 


  Succession_Narrow  Total Transitions Total Advancements  \
0        KIS to KJI                  6                  1   
1        KJI to KJU                  1              False   

   Percentage Advancements of OrgRank  
0                               16.67  
1                                0.00   

x1,x2,n1,n2: [1, False, 6, 1]
p1:	 0.16666666666666666
p2:	 0.0
p1-p2:	 0.16666666666666666
z:	 0.44095855184409843
pvalue:	 0.32962150184631533


 Party 


  Succession_Narrow  Total Transitions Tot

  p2 = x2/n2
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))


# Hypothesis 2d - Position Advancement, narrow succession, by Institution

In [123]:
ics = list(trans.InstitutionCategory_2.unique())
for ic in ics:
    print("\n\n",ic,"\n\n")
    stat4c_rows = (trans["InstitutionCategory_2"]==ic) & (trans["PISame"]==True) & (trans["OrgRankChange"].isin(["same"]))
    stat4c_columns = ["Succession_Narrow","PositionAdvanceYes"]
    stat4c_groupby = ["Succession_Narrow"]
    stat4c_labels = ["Succession Period","Total Transitions","Total Advancements"]
    
    stat4c = trans.loc[stat4c_rows,stat4c_columns].groupby(stat4c_groupby,as_index=False)["PositionAdvanceYes"].agg({"Total Transitions":"count","Total Advancements":"sum"})    
    stat4c["Percentage Advancements"] = stat4c.apply(lambda x: round(x["Total Advancements"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
   
    print(stat4c,"\n")
    
    x1 = stat4c.iloc[0,2]
    x2 = stat4c.iloc[1,2]
    n1 = stat4c.iloc[0,1]
    n2 = stat4c.iloc[1,1]
    
    two_sample_ttest_proportions(x1,x2,n1,n2,.05)



 Government 


  Succession_Narrow  Total Transitions Total Advancements  \
0        KIS to KJI                 25                 16   
1        KJI to KJU                 42                 13   

   Percentage Advancements  
0                    64.00  
1                    30.95   

x1,x2,n1,n2: [16, 13, 25, 42]
p1:	 0.64
p2:	 0.30952380952380953
p1-p2:	 0.3304761904761905
z:	 2.640471166016987
pvalue:	 0.0041395418644954995**


 Social 


  Succession_Narrow  Total Transitions Total Advancements  \
0        KIS to KJI                  5                  3   
1        KJI to KJU                  0                  0   

   Percentage Advancements  
0                     60.0  
1                      NaN   

x1,x2,n1,n2: [3, 0, 5, 0]
p1:	 0.6
p2:	 nan
p1-p2:	 nan
z:	 nan
pvalue:	 nan


 Party 


  Succession_Narrow  Total Transitions Total Advancements  \
0        KIS to KJI                 12                  0   
1        KJI to KJU                  0                  0   

   P

  p2 = x2/n2
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
  p2 = x2/n2
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))


# Hypothesis 3a - Org Advancement, succession period = Five

In [124]:
select_columns = ["Succession_Five","LeaderID"]
select_rows = trans["PISame"]==True
group_columns = ["Succession_Five"]
trans.loc[select_rows,select_columns].groupby(group_columns,as_index=False).count()

Unnamed: 0,Succession_Five,LeaderID
0,KIS to KJI,29
1,KJI to KJU,53


In [125]:
stat4_rows = trans["PISame"]==True
stat4_columns = ["Succession_Five","OrgAdvanceYes"]
stat4_groupby = ["Succession_Five"]
stat4_labels = ["Succession Period","Total Transitions","Total Advancements"]

In [126]:
stat4 = trans.loc[stat4_rows,stat4_columns].groupby(stat4_groupby,as_index=False)["OrgAdvanceYes"].agg({"Total Transitions":"count","Total Advancements":"sum"})
stat4

Unnamed: 0,Succession_Five,Total Transitions,Total Advancements
0,KIS to KJI,29,2
1,KJI to KJU,53,8


In [127]:
stat4["Percentage Advancements of OrgRank"] = stat4.apply(lambda x: round(x["Total Advancements"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
stat4

Unnamed: 0,Succession_Five,Total Transitions,Total Advancements,Percentage Advancements of OrgRank
0,KIS to KJI,29,2,6.9
1,KJI to KJU,53,8,15.09


In [128]:
# 2-sample test of proportions
two_sample_ttest_proportions(25,28,94,135,.05)

x1,x2,n1,n2: [25, 28, 94, 135]
p1:	 0.26595744680851063
p2:	 0.2074074074074074
p1-p2:	 0.05855003940110323
z:	 1.0334309766029885
pvalue:	 0.1507011281797649


# Hypothesis 3b - Position Advancement, succession period = Five

In [129]:
select_columns = ["Succession_Five","LeaderID"]
select_rows = (trans["PISame"] & trans["OrgSame"])==True
group_columns = ["Succession_Five"]
trans.loc[select_rows,select_columns].groupby(group_columns,as_index=False).count()

Unnamed: 0,Succession_Five,LeaderID
0,KIS to KJI,8
1,KJI to KJU,22


In [130]:
stat4b_rows = (trans["PISame"] & trans["OrgSame"])==True
stat4b_columns = ["Succession_Five","PositionAdvanceYes"]
stat4b_groupby = ["Succession_Five"]
stat4b_labels = ["Succession Period","Total Transitions","Total Advancements"]

In [131]:
stat4b = trans.loc[stat4b_rows,stat4b_columns].groupby(stat4b_groupby,as_index=False)["PositionAdvanceYes"].agg({"Total Transitions":"count","Total Advancements":"sum"})
stat4b

Unnamed: 0,Succession_Five,Total Transitions,Total Advancements
0,KIS to KJI,8,2
1,KJI to KJU,22,7


In [132]:
stat4b["Percentage Advancements of Position Rank"] = stat4b.apply(lambda x: round(x["Total Advancements"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
stat4b

Unnamed: 0,Succession_Five,Total Transitions,Total Advancements,Percentage Advancements of Position Rank
0,KIS to KJI,8,2,25.0
1,KJI to KJU,22,7,31.82


In [133]:
# 2-sample test of proportions
two_sample_ttest_proportions(7,11,15,26,.05)

x1,x2,n1,n2: [7, 11, 15, 26]
p1:	 0.4666666666666667
p2:	 0.4230769230769231
p1-p2:	 0.0435897435897436
z:	 0.2708996074187647
pvalue:	 0.39323412348757303


# Hypothesis 3c - Org Advancement, succession = Five, by Institution

In [134]:
ics = list(trans.InstitutionCategory_2.unique())
for ic in ics:
    print("\n\n",ic,"\n\n")
    stat4c_rows = (trans["InstitutionCategory_2"]==ic) & (trans["PISame"]==True)
    stat4c_columns = ["Succession_Five","OrgAdvanceYes"]
    stat4c_groupby = ["Succession_Five"]
    stat4c_labels = ["Succession Period","Total Transitions","Total Advancements"]
    
    stat4c = trans.loc[stat4c_rows,stat4c_columns].groupby(stat4c_groupby,as_index=False)["OrgAdvanceYes"].agg({"Total Transitions":"count","Total Advancements":"sum"})    
    stat4c["Percentage Advancements of OrgRank"] = stat4c.apply(lambda x: round(x["Total Advancements"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
   
    print(stat4c,"\n")
    
    p1 = stat4c.iloc[0,2]
    p2 = stat4c.iloc[1,2]
    n1 = stat4c.iloc[0,1]
    n2 = stat4c.iloc[1,1]
    
    two_sample_ttest_proportions(p1,p2,n1,n2,.05)



 Government 


  Succession_Five  Total Transitions Total Advancements  \
0      KIS to KJI                 16                  0   
1      KJI to KJU                 37                  4   

   Percentage Advancements of OrgRank  
0                                0.00  
1                               10.81   

x1,x2,n1,n2: [0, 4, 16, 37]
p1:	 0.0
p2:	 0.10810810810810811
p1-p2:	 -0.10810810810810811
z:	 -1.367820220879353
pvalue:	 0.08568417831748354


 Social 


  Succession_Five  Total Transitions Total Advancements  \
0      KIS to KJI                  9                  1   
1      KJI to KJU                  5                  0   

   Percentage Advancements of OrgRank  
0                               11.11  
1                                0.00   

x1,x2,n1,n2: [1, 0, 9, 5]
p1:	 0.1111111111111111
p2:	 0.0
p1-p2:	 0.1111111111111111
z:	 0.7734924681537618
pvalue:	 0.2196154922612239


 Party 


  Succession_Five  Total Transitions Total Advancements  \
0      KIS to KJI  

  p1 = x1/n1
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))


# Hypothesis 3d - Position Advancement, Succession = Five, by Institution

In [135]:
ics = list(trans.InstitutionCategory_2.unique())
for ic in ics:
    print("\n\n",ic,"\n\n")
    stat4c_rows = (trans["InstitutionCategory_2"]==ic) & (trans["PISame"]==True) & (trans["OrgSame"]==True)
    stat4c_columns = ["Succession_Five","PositionAdvanceYes"]
    stat4c_groupby = ["Succession_Five"]
    stat4c_labels = ["Succession Period","Total Transitions","Total Advancements"]
    
    stat4c = trans.loc[stat4c_rows,stat4c_columns].groupby(stat4c_groupby,as_index=False)["PositionAdvanceYes"].agg({"Total Transitions":"count","Total Advancements":"sum"})    
    stat4c["Percentage Advancements"] = stat4c.apply(lambda x: round(x["Total Advancements"] / x["Total Transitions"] * 100,2) if x["Total Transitions"] > 0 else np.nan,axis=1)
   
    print(stat4c,"\n")
    
    p1 = stat4c.iloc[0,2]
    p2 = stat4c.iloc[1,2]
    n1 = stat4c.iloc[0,1]
    n2 = stat4c.iloc[1,1]
    
    two_sample_ttest_proportions(p1,p2,n1,n2,.05)



 Government 


  Succession_Five  Total Transitions Total Advancements  \
0      KIS to KJI                  7                  1   
1      KJI to KJU                 19                  5   

   Percentage Advancements  
0                    14.29  
1                    26.32   

x1,x2,n1,n2: [1, 5, 7, 19]
p1:	 0.14285714285714285
p2:	 0.2631578947368421
p1-p2:	 -0.12030075187969924
z:	 -0.6457883604682657
pvalue:	 0.25920821351160406


 Social 


  Succession_Five  Total Transitions Total Advancements  \
0      KIS to KJI                  1               True   
1      KJI to KJU                  1               True   

   Percentage Advancements  
0                    100.0  
1                    100.0   

x1,x2,n1,n2: [True, True, 1, 1]
p1:	 1.0
p2:	 1.0
p1-p2:	 0.0
z:	 nan
pvalue:	 nan


 Party 


  Succession_Five  Total Transitions Total Advancements  \
0      KIS to KJI                  0                  0   
1      KJI to KJU                  1               True   

   Pe

  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
  p1 = x1/n1
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
  p1 = x1/n1
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
  z = (p1-p2) / math.sqrt(pstar*(1-pstar)*((1/n1) + (1/n2)))
