In [141]:
import pandas as pd
import numpy as np
from datetime import date
from statistics import mean, mode

# Tables

In [142]:
path_tables = "C:/Users/seoul/Dropbox/00 technical/github/nkelites/data/combined data/combined data - 2 tables/"

In [143]:
# tables
filename_careerorglink = "careerorglink.xlsx"
filename_leadercareerlink = "leadercareerlink.xlsx"
filename_orgtree = "orgtree.xlsx"
filename_elected = "positions_elected.xlsx"

In [144]:
col = pd.read_excel(path_tables + filename_careerorglink,dtype="str")
col.shape

(9002, 12)

In [145]:
col.columns

Index(['CareerString', 'CareerDateString_2022', 'IsJob', 'MultipleSubstrings',
       'CareerStartYear', 'CareerStartMonth', 'CareerSubstring',
       'InstitutionType', 'PrimaryInstitution', 'OrgName', 'Position',
       'Notes'],
      dtype='object')

In [146]:
lcl = pd.read_excel(path_tables + filename_leadercareerlink,dtype="str")
lcl.shape

(12617, 3)

In [147]:
lcl.columns

Index(['LeaderID', 'CareerString', 'CareerDateString_2022'], dtype='object')

In [148]:
org = pd.read_excel(path_tables + filename_orgtree,dtype="str")
org.shape

(2368, 19)

In [149]:
org.columns

Index(['InstitutionType', 'OrgType', 'PrimaryInstitution', 'OrgName',
       'PI_Index', 'OrgRank', 'P1', 'P2', 'P3', 'Alias_OrgName',
       'LinkToNext_PI', 'LinkToNext_Org', 'LinkToNext_Year', 'Notes',
       'L1_Index', 'L2_Index', 'L3_Index', 'L4_Index', 'L5_Index'],
      dtype='object')

In [150]:
elected = pd.read_excel(path_tables + filename_elected,dtype="str")
elected.shape

(105, 4)

In [151]:
elected.columns

Index(['PrimaryInstitution', 'OrgName', 'Position', 'IsElected'], dtype='object')

# Queries

In [152]:
path_queries = "C:/Users/seoul/Dropbox/00 technical/github/nkelites/data/combined data/combined data - 3 queries/"

In [153]:
# filename_leaderjobtransition_alljobs = "leaderjobtransition_alljobs.xlsx"
# filename_leaderjobtransition_ingov = "leaderjobtransition_ingov.xlsx"

filename_leaderjob_no_spa = "leaderjob_no_spa.xlsx"
filename_leaderjobtransition_no_spa = "leaderjobtransition_no_spa.xlsx"

In [154]:
# trans = pd.read_excel(path_queries + filename_leaderjobtransition,dtype="str")

In [155]:
today = date.today()
print(today)

2023-10-24


# Analysis

In [156]:
path_analysis = "C:/Users/seoul/Dropbox/00 technical/github/nkelites/analysis/"

In [157]:
# subpaths
current_subpath = "2023.09.01 analysis/"

# Functions

In [158]:
def merge_results(m):
    
    print("\nMerge Results...")
    print("")
    print("\tshape     :",m.shape)
    print("\tleft_only :",m[m["_merge"]=="left_only"].shape)
    print("\tboth      :",m[m["_merge"]=="both"].shape)
    print("\tright_only:",m[m["_merge"]=="right_only"].shape)

In [159]:
# using this on (PI,OrgName) will ensure unique & non-null keys
# using this on a larger df will ensure unique rows and non-null keys, but not unique keys

def unique_non_null_rows(olddf):
    
    df = olddf.copy()
    
    
    ### drop duplicates
    df = df.drop_duplicates(keep="first",ignore_index=True)

    ### drop null rows
    df.dropna(how="all",axis=0,inplace=True)
    
    # drop rows with null PI
    df = df[~(df["PrimaryInstitution"].isna())]
        
    ### drop rows where PI contains stop words
    stop_words_lower = ["uncertain","current","deprecated","please_revise"]
    df = df[ ~ df["PrimaryInstitution"].str.lower().isin(stop_words_lower)]
            

    print("\nUnique Non-null Rows...")
    print("")
    print("\tNon-unique rows:",olddf.shape)
    print("\tUnique rows    :",df.shape)

    ### sort
    df = df.sort_values(["PrimaryInstitution","OrgName"])
    
    return df

# Step. Format Jobs

#### merge col with elected - to identify elected positions

In [160]:
position_key_columns = ['PrimaryInstitution', 'OrgName', 'Position']
elected_columns = position_key_columns + ["IsElected"]

In [161]:
col2 = col.merge(elected[elected_columns],on=position_key_columns,how="outer",indicator=True)
merge_results(col2)


Merge Results...

	shape     : (9033, 14)
	left_only : (7261, 14)
	both      : (1741, 14)
	right_only: (31, 14)


In [162]:
# address this later... whether
# col2[col2["_merge"]=="right_only"][position_key_columns]

In [163]:
col2.columns

Index(['CareerString', 'CareerDateString_2022', 'IsJob', 'MultipleSubstrings',
       'CareerStartYear', 'CareerStartMonth', 'CareerSubstring',
       'InstitutionType', 'PrimaryInstitution', 'OrgName', 'Position', 'Notes',
       'IsElected', '_merge'],
      dtype='object')

In [164]:
# update col with elected positions
col = col2.drop(columns=["_merge"])

In [165]:
col.IsElected.unique()

array([nan, '1'], dtype=object)

#### select jobs that satisfy criterion for transitions 

In [166]:
# 1. IsJob = True
# 2. CareerStartYear can't be null; {InstitutionType PI, OrgName, Position} can't be UNCERTAIN

# Criteria 1: is a job
select_jobs1 = (col["IsJob"]=="True")

In [167]:
# Criteria 2: has a start year, not UNCERTAIN

select_jobs2 = (col["CareerStartYear"].notna()) & (col["InstitutionType"]!="UNCERTAIN") & (col["PrimaryInstitution"]!="UNCERTAIN") & (col["OrgName"]!="UNCERTAIN") & (col["Position"]!="UNCERTAIN")

In [168]:
# Criteria 3: not in SPA

select_jobs3 = ~(col["PrimaryInstitution"]=="최고인민회의")

In [169]:
col.InstitutionType.unique()

array(['당외곽및사회단체_사회부문(별책)', 'UNCERTAIN', '당외곽및사회단체_경제부문(별책)', '정권기관',
       '인민군', nan, '당외곽및사회단체_체육부문', '당외곽및사회단체_근로단체', '노동당',
       '당외곽및사회단체_정치부문', '국제친선단체', '당외곽및사회단체_대외부문', '당외곽및사회단체_사회부문',
       '당외곽및사회단체_경제부문', '당외곽및사회단체_종교부문'], dtype=object)

In [170]:
job_columns = ['CareerString', 'CareerDateString_2022', 'CareerStartYear', 'CareerStartMonth', 'CareerSubstring',
       'InstitutionType', 'PrimaryInstitution', 'OrgName', 'Position','IsElected']

In [171]:
# for all CERTAIN jobs, not in SPA, select on Criteria 1 & 2
jobs_all = col.loc[select_jobs1 & select_jobs2 & select_jobs3,job_columns]
jobs_all.shape

(5221, 10)

#### descriptive statistics

In [172]:
# not jobs, excluded
col[~select_jobs1].shape

(2302, 13)

In [173]:
# jobs, excluded because of unknown/uncertain data 
col[select_jobs1 & ~select_jobs2].shape

(731, 13)

In [174]:
# jobs, excluded because in SPA
col[select_jobs1 & select_jobs2 & ~select_jobs3].shape

(779, 13)

In [175]:
# jobs, certain, not in SPA
col[select_jobs1 & select_jobs2 & select_jobs3].shape

(5221, 13)

In [176]:
2302 + 731 + 779 + 5221

9033

In [177]:
col.shape

(9033, 13)

# Step. add Org and Position metadata from orgtree, such as Rank

In [178]:
org_key_columns = ['PrimaryInstitution', 'OrgName']

In [179]:
orgtree_columns = org_key_columns + ['OrgRank', 'P1', 'P2', 'P3']

In [180]:
jobs_all2 = jobs_all.merge(org[orgtree_columns],on=org_key_columns,how="left",indicator=True)
merge_results(jobs_all2)


Merge Results...

	shape     : (5221, 15)
	left_only : (0, 15)
	both      : (5221, 15)
	right_only: (0, 15)


In [181]:
# Good! No left_only means we are matching on all PI-OrgName keys
# jobs2[jobs2["_merge"]=="left_only"]

In [182]:
jobs_all2.head(2)

Unnamed: 0,CareerString,CareerDateString_2022,CareerStartYear,CareerStartMonth,CareerSubstring,InstitutionType,PrimaryInstitution,OrgName,Position,IsElected,OrgRank,P1,P2,P3,_merge
0,4.15문학창작단 단장,1989.04,1989,4,,당외곽및사회단체_사회부문(별책),4.15문화창작단,,단장,,0,단장,부단장,,both
1,"1989. 4.15문학창작단 단장, 조선작가동맹 통일문학담당 부위원장",,1989,4,4.15 문화창작단 단장,당외곽및사회단체_사회부문(별책),4.15문화창작단,,단장,,0,단장,부단장,,both


In [183]:
# determine PositionRank - i.e., find Position in P1-P3

# row-vectorized formula - meant for use in df.apply()

def determine_Position_Rank(row):
    
    PositionRank = np.nan
    rowPos = row["Position"]

    # print("\n")
    # print(row["Position"],type(row["Position"]),type(rowPos))
    
    # print("P1:",row["P1"])
    if isinstance(row["P1"],str):
        if rowPos in row["P1"].split(","):
            PositionRank = 1
    
    # print("P2:",row["P2"])
    if isinstance(row["P2"],str):
        if rowPos in row["P2"].split(","):
            PositionRank = 2
          
    # print("P3:",row["P3"])
    if isinstance(row["P3"],str):
        if rowPos in row["P3"].split(","):
            PositionRank = 3
            
    # print("PositionRank:",PositionRank)
        
    return PositionRank

In [184]:
jobs_all2["PositionRank"] = jobs_all2.apply(determine_Position_Rank,axis=1)

In [185]:
jobs_all2.columns

Index(['CareerString', 'CareerDateString_2022', 'CareerStartYear',
       'CareerStartMonth', 'CareerSubstring', 'InstitutionType',
       'PrimaryInstitution', 'OrgName', 'Position', 'IsElected', 'OrgRank',
       'P1', 'P2', 'P3', '_merge', 'PositionRank'],
      dtype='object')

In [186]:
new_jobs_columns = ['CareerString', 'CareerDateString_2022', 'CareerStartYear',
       'CareerStartMonth', 'CareerSubstring', 'InstitutionType',
       'PrimaryInstitution', 'OrgName', 'Position', 'IsElected', 'OrgRank', 'PositionRank']

In [187]:
jobs_all3 = jobs_all2[new_jobs_columns]

In [188]:
jobs_all3.shape

(5221, 12)

# Step. merge leadercareerlink & jobs

In [189]:
job_key_columns = ['CareerString', 'CareerDateString_2022']

In [190]:
lcl.head(2)

Unnamed: 0,LeaderID,CareerString,CareerDateString_2022
0,리선권,개성공단 남북공동위원회 통행통신통관 분과위원회,2013.09 ~ 2014.01
1,조경철,故 김정일 국가장의위원회 위원,2011.12


In [191]:
# leaderjoblink - outer join for descriptive statistics
ljob_outer = lcl.merge(jobs_all3,on=job_key_columns,how="outer",indicator=True)
merge_results(ljob_outer)


Merge Results...

	shape     : (13257, 14)
	left_only : (7210, 14)
	both      : (6047, 14)
	right_only: (0, 14)


In [192]:
# no right_only confirms no mismatched career keys in careerorglink
# left-only indicates all of the excluded career items

In [193]:
# leaderjoblink - inner join for the table we will use to make transitions
ljob = lcl.merge(jobs_all3,on=job_key_columns,how="inner",indicator=True)
merge_results(ljob)


Merge Results...

	shape     : (6047, 14)
	left_only : (0, 14)
	both      : (6047, 14)
	right_only: (0, 14)


In [194]:
# instantiate CareerStartDate
ljob["CareerStartDate"]=np.nan

In [195]:
# CareerStartDate when CareerStartMonth is null
selection = ljob["CareerStartMonth"].isna()
ljob.loc[selection,"CareerStartDate"] = ljob[selection].apply(lambda x: str(x["CareerStartYear"]) + "00",axis=1)

In [196]:
# CareerStartDate when CareerStartMonth has 2 digits
selection = (ljob["CareerStartMonth"].notna()) & (ljob.apply(lambda x: len(str(x["CareerStartMonth"])),axis=1)==2)
ljob.loc[selection,"CareerStartDate"] = ljob[selection].apply(lambda x: str(x["CareerStartYear"]) + str(x["CareerStartMonth"]),axis=1)

In [197]:
# CareerStartDate when CareerStartMonth has 1 digit
selection = (ljob["CareerStartMonth"].notna()) & (ljob.apply(lambda x: len(str(x["CareerStartMonth"])),axis=1)==1)
ljob.loc[selection,"CareerStartDate"] = ljob[selection].apply(lambda x: str(x["CareerStartYear"]) + "0" + str(x["CareerStartMonth"]),axis=1)

In [198]:
ljob.columns

Index(['LeaderID', 'CareerString', 'CareerDateString_2022', 'CareerStartYear',
       'CareerStartMonth', 'CareerSubstring', 'InstitutionType',
       'PrimaryInstitution', 'OrgName', 'Position', 'IsElected', 'OrgRank',
       'PositionRank', '_merge', 'CareerStartDate'],
      dtype='object')

In [199]:
ljob_columns = ['LeaderID', 'CareerString', 'CareerDateString_2022','CareerStartYear', 'CareerStartMonth','CareerStartDate','CareerSubstring', 'InstitutionType',
       'PrimaryInstitution', 'OrgName', 'Position','IsElected','OrgRank', 'PositionRank']

In [200]:
ljob = ljob[ljob_columns]

In [201]:
ljob.head(2)

Unnamed: 0,LeaderID,CareerString,CareerDateString_2022,CareerStartYear,CareerStartMonth,CareerStartDate,CareerSubstring,InstitutionType,PrimaryInstitution,OrgName,Position,IsElected,OrgRank,PositionRank
0,리원일,노동성 상(유임),1999.02,1999,2,199902,,정권기관,내각,노동성,상,,1,1
1,조용원,당 정치국 후보위원,2020.01,2020,1,202001,,노동당,노동당,당중앙위원회_정치국,후보위원,1.0,2,3


#### descriptive statistics

In [202]:
# unique leaders, overall
len(lcl.LeaderID.unique())

637

In [203]:
# unique leaders, with jobs included
len(ljob.LeaderID.unique())

584

In [204]:
# leaders not having jobs that satisfy our critiria; excluded from analysis
len(lcl.LeaderID.unique())-len(ljob.LeaderID.unique())

53

In [205]:
# job items, after pairing with leaders (some jobs reported for more than one leader)
ljob.shape

(6047, 14)

In [206]:
# unique positions
position_key_columns = ['PrimaryInstitution', 'OrgName','Position']
ljob[position_key_columns].drop_duplicates().shape

(1507, 3)

In [207]:
# unique orgs
org_key_columns = ['PrimaryInstitution', 'OrgName']
ljob[org_key_columns].drop_duplicates().shape

(967, 2)

# Step. calculate time-adjacent transitions for each leader

In [208]:
def calculate_resume_transitions(lc,leaderid):

    t = lc[(lc["LeaderID"]==leaderid) & (lc["CareerStartYear"].notna())]
    t = t.drop_duplicates(["CareerStartYear","PrimaryInstitution","OrgName"],keep="first")
    t = t.sort_values("CareerStartDate",ignore_index=True)
    # print("resume:",t.shape)
    
    tr = t.merge(t,on="LeaderID",how="inner",suffixes=("_1","_2"))
    # sort based on CareerStartDates
    tr = tr.sort_values(["CareerStartDate_1","CareerStartDate_2"])
    
    # drop keys where CareerStartDate_1 <= CareerStartDate_2
    tr = tr[tr.apply(lambda x: x["CareerStartDate_1"] < x["CareerStartDate_2"],axis=1)]
    #tr.shape
    
    # smallest next startdate
    dates = tr[["CareerStartDate_1","CareerStartDate_2"]]
    #dates
    mindate = dates.groupby("CareerStartDate_1",as_index=False).agg({"CareerStartDate_2":"min"})
    mindate.columns = ["CareerStartDate_1","CareerStartDate_2_min"]
    #mindate
    
    tr2 = tr.merge(mindate,on="CareerStartDate_1",how="left")
    #tr2.shape
    
    tr3 = tr2[tr2.apply(lambda x: x["CareerStartDate_2"] == x["CareerStartDate_2_min"],axis=1)]
    # print("transitions:",tr3.shape)
    
    #print(tr3[some_columns])
    
    print(leaderid,tr3.shape[0],"transitions")

    return tr3

#### a good example of many of the problems we need to address

In [209]:
leaderid = "강관주"
resume = calculate_resume_transitions(ljob,leaderid)

강관주 16 transitions


In [210]:
some_columns = ["LeaderID","CareerStartDate_1","PrimaryInstitution_1","OrgName_1","Position_1","CareerStartDate_2","PrimaryInstitution_2","OrgName_2","Position_2"]
resume[some_columns]

Unnamed: 0,LeaderID,CareerStartDate_1,PrimaryInstitution_1,OrgName_1,Position_1,CareerStartDate_2,PrimaryInstitution_2,OrgName_2,Position_2
0,강관주,196100,체육연구원,체육과학연구소,연구원,197300,노동당,당중앙위원회_문화예술부_X과,과장
1,강관주,196100,체육연구원,체육과학연구소,연구원,197300,평양만경대예술단,섭외부,부부장
14,강관주,197300,노동당,당중앙위원회_문화예술부_X과,과장,198600,노동당,당중앙위원회_조직지도부,부부장
15,강관주,197300,평양만경대예술단,섭외부,부부장,198600,노동당,당중앙위원회_조직지도부,부부장
38,강관주,198600,노동당,당중앙위원회_조직지도부,부부장,198800,노동당,당중앙위원회_통일전선부,부부장
49,강관주,198800,노동당,당중앙위원회_통일전선부,부부장,198803,노동당,당중앙위원회,위원
59,강관주,198803,노동당,당중앙위원회,위원,198903,노동당,당중앙위원회_통일전선부,제1부부장
68,강관주,198903,노동당,당중앙위원회_통일전선부,제1부부장,199008,내각,조국평화통일위원회,부위원장
69,강관주,198903,노동당,당중앙위원회_통일전선부,제1부부장,199008,정무원,조국평화통일위원회,부위원장
76,강관주,199008,내각,조국평화통일위원회,부위원장,199109,보천보전자악단,,고문


In [211]:
leaderids = list(ljob["LeaderID"].unique())

In [212]:
len(leaderids)

584

In [213]:
def compile_leader_transitions(lc):
    
    leaderids = list(lc["LeaderID"].unique())
    
    leader_resumes = []
    
    for leaderid in leaderids:
        leader_resume = calculate_resume_transitions(lc,leaderid)
        leader_resumes.append(leader_resume)
        
    lct = pd.concat(leader_resumes,ignore_index=True)
    
    return lct

In [214]:
transitions = compile_leader_transitions(ljob)

리원일 8 transitions
조용원 10 transitions
정경택 16 transitions
리하일 9 transitions
최상건 14 transitions
차희림 4 transitions
김기남 30 transitions
김령성 8 transitions
김정숙 24 transitions
박성철a 51 transitions
김창룡 0 transitions
김인식 2 transitions
리영호 5 transitions
최태복 31 transitions
김영일a 18 transitions
주상성 11 transitions
강동윤 4 transitions
김익현 11 transitions
김일성 76 transitions
김일본 40 transitions
박태화 8 transitions
최광 28 transitions
백학림 28 transitions
김중린 33 transitions
리종옥 43 transitions
리을설 20 transitions
박송봉 4 transitions
권희경 8 transitions
김의순 14 transitions
오진우 31 transitions
양형섭 42 transitions
김영주 33 transitions
김원균 4 transitions
김철만 24 transitions
백인준 10 transitions
황순희 14 transitions
최영림 29 transitions
김영남 38 transitions
전병호 22 transitions
계응태 27 transitions
홍성남 30 transitions
강석숭 16 transitions
김복신 19 transitions
채희정 25 transitions
김용순 35 transitions
김영춘 18 transitions
길재경 8 transitions
강관주 16 transitions
박용석 10 transitions
박봉주 23 transitions
전하철 13 transitions
장병규 3 transitions
김국태 19 transitions
김정일 26

권영진 3 transitions
리히용 2 transitions
리선권 4 transitions
김영환 6 transitions
우상철 2 transitions
리태섭 0 transitions
리호림 0 transitions
김능오 7 transitions
김조국 0 transitions
강윤석 7 transitions
박광식 0 transitions
박수일 5 transitions
서창룡 0 transitions
장광봉 0 transitions
장기호 0 transitions
전태수 0 transitions
오동일 0 transitions
정인철 0 transitions
김락겸 1 transitions
장길성 2 transitions
리득남 1 transitions
강봉훈 6 transitions
김세복 0 transitions
장춘성 0 transitions
강표영 3 transitions
윤동현 1 transitions
조경철 2 transitions
고길선 1 transitions
김유일 0 transitions
김정식 0 transitions
리정남 2 transitions
박성철b 0 transitions
박창호 4 transitions
서홍찬 9 transitions
송춘섭 5 transitions
신룡만 2 transitions
안금철 0 transitions
김명남 0 transitions
렴철성 1 transitions
리용주 2 transitions
박영호 1 transitions
장창하 1 transitions
장혁 2 transitions
전일호 1 transitions
최영호a 1 transitions
장길룡 3 transitions
최선희 9 transitions
김광혁 1 transitions
김명길b 2 transitions
전학철 2 transitions
김충일 1 transitions
손철주 3 transitions
마종선 0 transitions
유진 2 transitions
김만성 2 transitions
장금철 0 tra

#### add OrgAdvance, PositionAdvance variables

In [215]:
transitions["OrgAdvance"] = transitions.apply(lambda x: int(x["OrgRank_1"]) - int(x["OrgRank_2"]),axis=1)

In [216]:
transitions["PositionAdvance"] = transitions.apply(lambda x: int(x["PositionRank_1"]) - int(x["PositionRank_2"]),axis=1)

#### descriptive statistics

In [217]:
# leaders with 1 or more job transition
len(transitions.LeaderID.unique())

505

In [218]:
# leaders with only 1 job; excluded from analysis
len(ljob.LeaderID.unique())-len(transitions.LeaderID.unique())

79

In [219]:
# number of transitions
transitions.shape

(4306, 30)

In [220]:
transition_counts = transitions.groupby("LeaderID")["Position_1"].count()

In [221]:
min(transition_counts)

1

In [222]:
max(transition_counts)

76

In [223]:
mean(transition_counts)

8.526732673267327

In [224]:
mode(transition_counts)

1

#### frequency of leaders by # of job transitions

In [225]:
x_axis = []
transition_count_distribution = []
for i in range(0,max(transition_counts)+1):
    pdf = sum([1 for count in transition_counts if count ==i])
    # print(i,pdf)
    x_axis.append(i) 
    transition_count_distribution.append(pdf)

In [226]:
transition_count_freq = pd.DataFrame({"Count":x_axis,"Frequency":transition_count_distribution})

In [227]:
# filename_transition_count_freq_alljobs = "transition_count_freq_alljobs.xlsx"
# filename_transition_count_freq_ingov = "transition_count_freq_ingov.xlsx"
filename_transition_count_freq_ingov = "transition_count_freq_no_spa.xlsx"

transition_count_freq.to_excel(path_analysis + current_subpath + filename_transition_count_freq_ingov,index=False)

# Export Query

#### format leaderjob query for export

In [232]:
ljob.columns

Index(['LeaderID', 'CareerString', 'CareerDateString_2022', 'CareerStartYear',
       'CareerStartMonth', 'CareerStartDate', 'CareerSubstring',
       'InstitutionType', 'PrimaryInstitution', 'OrgName', 'Position',
       'IsElected', 'OrgRank', 'PositionRank'],
      dtype='object')

#### export leaderjob query

In [233]:
# refresh leaderjob query
ljob.to_excel(path_queries + filename_leaderjob_no_spa,index=False)

#### format transitions query for export

In [234]:
transitions.columns

Index(['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
       'OrgName_1', 'Position_1', 'IsElected_1', 'OrgRank_1', 'PositionRank_1',
       'CareerString_2', 'CareerDateString_2022_2', 'CareerStartYear_2',
       'CareerStartMonth_2', 'CareerStartDate_2', 'CareerSubstring_2',
       'InstitutionType_2', 'PrimaryInstitution_2', 'OrgName_2', 'Position_2',
       'IsElected_2', 'OrgRank_2', 'PositionRank_2', 'OrgAdvance',
       'PositionAdvance'],
      dtype='object')

In [235]:
# omit CareerStartDate_2_min and any other fields we don't need

transitions_columns = ['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
        'OrgName_1', 'Position_1', 'IsElected_1', 'OrgRank_1', 'PositionRank_1',
        'CareerString_2', 'CareerDateString_2022_2', 'CareerStartYear_2',
        'CareerStartMonth_2', 'CareerStartDate_2', 'CareerSubstring_2',
        'InstitutionType_2', 'PrimaryInstitution_2', 'OrgName_2', 'Position_2', 'IsElected_2',
        'OrgRank_2', 'PositionRank_2','OrgAdvance','PositionAdvance']

In [236]:
transitions = transitions[transitions_columns]

#### export transition query

In [237]:
# refresh leaderjobtransition query
transitions.to_excel(path_queries + filename_leaderjobtransition_no_spa,index=False)