In [1]:
import pandas as pd
import numpy as np
from datetime import date
from statistics import mean, mode

# Tables

In [2]:
path_tables = "C:/Users/seoul/Dropbox/00 technical/github/nkelites/data/combined data/combined data - 2 tables/"

In [3]:
# tables
filename_careerorglink = "careerorglink.xlsx"
filename_leadercareerlink = "leadercareerlink.xlsx"
filename_orgtree = "orgtree.xlsx"
filename_elected = "positions_elected.xlsx"

In [4]:
col = pd.read_excel(path_tables + filename_careerorglink,dtype="str")
col.shape

(9002, 12)

In [5]:
col.columns

Index(['CareerString', 'CareerDateString_2022', 'IsJob', 'MultipleSubstrings',
       'CareerStartYear', 'CareerStartMonth', 'CareerSubstring',
       'InstitutionType', 'PrimaryInstitution', 'OrgName', 'Position',
       'Notes'],
      dtype='object')

In [6]:
lcl = pd.read_excel(path_tables + filename_leadercareerlink,dtype="str")
lcl.shape

(12617, 3)

In [7]:
lcl.columns

Index(['LeaderID', 'CareerString', 'CareerDateString_2022'], dtype='object')

In [8]:
org = pd.read_excel(path_tables + filename_orgtree,dtype="str")
org.shape

(2367, 19)

In [9]:
org.columns

Index(['InstitutionType', 'OrgType', 'PrimaryInstitution', 'OrgName',
       'PI_Index', 'OrgRank', 'P1', 'P2', 'P3', 'Alias_OrgName',
       'LinkToNext_PI', 'LinkToNext_Org', 'LinkToNext_Year', 'Notes',
       'L1_Index', 'L2_Index', 'L3_Index', 'L4_Index', 'L5_Index'],
      dtype='object')

In [10]:
elected = pd.read_excel(path_tables + filename_elected,dtype="str")
elected.shape

(105, 4)

In [11]:
elected.columns

Index(['PrimaryInstitution', 'OrgName', 'Position', 'IsElected'], dtype='object')

# Queries

In [12]:
path_queries = "C:/Users/seoul/Dropbox/00 technical/github/nkelites/data/combined data/combined data - 3 queries/"

In [13]:
# filename_leaderjobtransition_alljobs = "leaderjobtransition_alljobs.xlsx"
filename_leaderjobtransition_ingov = "leaderjobtransition_ingov.xlsx"

In [14]:
# trans = pd.read_excel(path_queries + filename_leaderjobtransition,dtype="str")

In [15]:
today = date.today()
print(today)

2024-02-10


# Analysis

In [16]:
path_analysis = "C:/Users/seoul/Dropbox/00 technical/github/nkelites/analysis/"

In [17]:
# subpaths
current_subpath = "2023.09.01 analysis/"

# Functions

In [18]:
def merge_results(m):
    
    print("\nMerge Results...")
    print("")
    print("\tshape     :",m.shape)
    print("\tleft_only :",m[m["_merge"]=="left_only"].shape)
    print("\tboth      :",m[m["_merge"]=="both"].shape)
    print("\tright_only:",m[m["_merge"]=="right_only"].shape)

In [19]:
# using this on (PI,OrgName) will ensure unique & non-null keys
# using this on a larger df will ensure unique rows and non-null keys, but not unique keys

def unique_non_null_rows(olddf):
    
    df = olddf.copy()
    
    
    ### drop duplicates
    df = df.drop_duplicates(keep="first",ignore_index=True)

    ### drop null rows
    df.dropna(how="all",axis=0,inplace=True)
    
    # drop rows with null PI
    df = df[~(df["PrimaryInstitution"].isna())]
        
    ### drop rows where PI contains stop words
    stop_words_lower = ["uncertain","current","deprecated","please_revise"]
    df = df[ ~ df["PrimaryInstitution"].str.lower().isin(stop_words_lower)]
            

    print("\nUnique Non-null Rows...")
    print("")
    print("\tNon-unique rows:",olddf.shape)
    print("\tUnique rows    :",df.shape)

    ### sort
    df = df.sort_values(["PrimaryInstitution","OrgName"])
    
    return df

# Step. Format Jobs

#### merge col with elected - to identify elected positions

In [20]:
position_key_columns = ['PrimaryInstitution', 'OrgName', 'Position']
elected_columns = position_key_columns + ["IsElected"]

In [21]:
col2 = col.merge(elected[elected_columns],on=position_key_columns,how="outer",indicator=True)
merge_results(col2)


Merge Results...

	shape     : (9033, 14)
	left_only : (7261, 14)
	both      : (1741, 14)
	right_only: (31, 14)


In [22]:
# address this later... whether
# col2[col2["_merge"]=="right_only"][position_key_columns]

In [23]:
col2.columns

Index(['CareerString', 'CareerDateString_2022', 'IsJob', 'MultipleSubstrings',
       'CareerStartYear', 'CareerStartMonth', 'CareerSubstring',
       'InstitutionType', 'PrimaryInstitution', 'OrgName', 'Position', 'Notes',
       'IsElected', '_merge'],
      dtype='object')

In [24]:
# update col with elected positions
col = col2.drop(columns=["_merge"])

In [25]:
col.IsElected.unique()

array([nan, '1'], dtype=object)

#### select jobs that satisfy criterion for transitions 

In [26]:
# IsJob = True
# CareerStartYear can't be null
# IsElected = False
# PI, OrgName, Position can't be UNCERTAIN

# Criteria 1: is a job
select_jobs1 = (col["IsJob"]=="True")

In [27]:
# Criteria 2: has a start year, not elected, not UNCERTAIN

select_jobs2 = (col["CareerStartYear"].notna()) & (col["IsElected"].isnull()) & (col["InstitutionType"]!="UNCERTAIN") & (col["PrimaryInstitution"]!="UNCERTAIN") & (col["OrgName"]!="UNCERTAIN") & (col["Position"]!="UNCERTAIN")

In [28]:
# Criteria 3: is InGov

outside_instituiton_types = ['당외곽및사회단체_사회부문(별책)',
                             '당외곽및사회단체_경제부문(별책)',
                             '당외곽및사회단체_사회부문',
                             '당외곽및사회단체_경제부문',
                             '당외곽및사회단체_체육부문',
                             '당외곽및사회단체_근로단체',
                             '당외곽및사회단체_정치부문',
                             '당외곽및사회단체_대외부문',
                             '당외곽및사회단체_종교부문',
                             '국제친선단체',
                             'UNCERTAIN',
                             np.nan]

select_jobs3 = ~(col["InstitutionType"].isin(outside_instituiton_types))

In [29]:
col.InstitutionType.unique()

array(['당외곽및사회단체_사회부문(별책)', 'UNCERTAIN', '당외곽및사회단체_경제부문(별책)', '정권기관',
       '인민군', nan, '당외곽및사회단체_체육부문', '당외곽및사회단체_근로단체', '노동당',
       '당외곽및사회단체_정치부문', '국제친선단체', '당외곽및사회단체_대외부문', '당외곽및사회단체_사회부문',
       '당외곽및사회단체_경제부문', '당외곽및사회단체_종교부문'], dtype=object)

In [30]:
job_columns = ['CareerString', 'CareerDateString_2022', 'CareerStartYear', 'CareerStartMonth', 'CareerSubstring',
       'InstitutionType', 'PrimaryInstitution', 'OrgName', 'Position']

In [31]:
# for all CERTAIN, non-elected jobs, select on Criteria 1 & 2
jobs_all = col.loc[select_jobs1 & select_jobs2,job_columns]
jobs_all.shape

(4321, 9)

In [32]:
# for all CERTAIN, non-elected, and ingov jobs, select on Criteria 1 & 2 & 3
jobs_ingov = col.loc[select_jobs1 & select_jobs2 & select_jobs3,job_columns]
jobs_ingov.shape

(3142, 9)

#### descriptive statistics

In [33]:
# not jobs, excluded
col[~select_jobs1].shape

(2302, 13)

In [34]:
# jobs, excluded, either because elected or because CareerStartYear is unknown
col[select_jobs1 & ~select_jobs2].shape

(2410, 13)

In [35]:
# jobs, excluded, because not in gov
col[select_jobs1 & select_jobs2 & ~select_jobs3].shape

(1179, 13)

In [36]:
# out of gov + ingov
col[select_jobs1 & select_jobs2 & ~select_jobs3].shape[0] + col[select_jobs1 & select_jobs2 & select_jobs3].shape[0]

4321

In [37]:
# jobs, out of gov + ingov 
col[select_jobs1 & select_jobs2].shape

(4321, 13)

# Step. add Org and Position metadata from orgtree, such as Rank

In [38]:
org_key_columns = ['PrimaryInstitution', 'OrgName']

In [39]:
orgtree_columns = org_key_columns + ['OrgRank', 'P1', 'P2', 'P3']

In [40]:
jobs_ingov2 = jobs_ingov.merge(org[orgtree_columns],on=org_key_columns,how="left",indicator=True)
merge_results(jobs_ingov2)


Merge Results...

	shape     : (3142, 14)
	left_only : (0, 14)
	both      : (3142, 14)
	right_only: (0, 14)


In [41]:
# Good! No left_only means we are matching on all PI-OrgName keys
# jobs2[jobs2["_merge"]=="left_only"]

In [42]:
jobs_ingov2.head(2)

Unnamed: 0,CareerString,CareerDateString_2022,CareerStartYear,CareerStartMonth,CareerSubstring,InstitutionType,PrimaryInstitution,OrgName,Position,OrgRank,P1,P2,P3,_merge
0,1984. 9 개성시 농촌경리위원장,,1984,9,,정권기관,개성시농촌경리위원회,,위원장,0,위원장,,,both
1,개성시 농촌경리위원장,1984.09,1984,9,,정권기관,개성시농촌경리위원회,,위원장,0,위원장,,,both


In [43]:
# determine PositionRank - i.e., find Position in P1-P3

# row-vectorized formula - meant for use in df.apply()

def determine_Position_Rank(row):
    
    PositionRank = np.nan
    rowPos = row["Position"]

    # print("\n")
    # print(row["Position"],type(row["Position"]),type(rowPos))
    
    # print("P1:",row["P1"])
    if isinstance(row["P1"],str):
        if rowPos in row["P1"].split(","):
            PositionRank = 1
    
    # print("P2:",row["P2"])
    if isinstance(row["P2"],str):
        if rowPos in row["P2"].split(","):
            PositionRank = 2
          
    # print("P3:",row["P3"])
    if isinstance(row["P3"],str):
        if rowPos in row["P3"].split(","):
            PositionRank = 3
            
    # print("PositionRank:",PositionRank)
        
    return PositionRank

In [44]:
jobs_ingov2["PositionRank"] = jobs_ingov2.apply(determine_Position_Rank,axis=1)

In [45]:
jobs_ingov2.columns

Index(['CareerString', 'CareerDateString_2022', 'CareerStartYear',
       'CareerStartMonth', 'CareerSubstring', 'InstitutionType',
       'PrimaryInstitution', 'OrgName', 'Position', 'OrgRank', 'P1', 'P2',
       'P3', '_merge', 'PositionRank'],
      dtype='object')

In [46]:
new_jobs_columns = ['CareerString', 'CareerDateString_2022', 'CareerStartYear',
       'CareerStartMonth', 'CareerSubstring', 'InstitutionType',
       'PrimaryInstitution', 'OrgName', 'Position', 'OrgRank', 'PositionRank']

In [47]:
jobs_ingov = jobs_ingov2[new_jobs_columns]

In [48]:
jobs_ingov.shape

(3142, 11)

# Step. merge leadercareerlink & jobs

In [49]:
job_key_columns = ['CareerString', 'CareerDateString_2022']

In [50]:
lcl.head(2)

Unnamed: 0,LeaderID,CareerString,CareerDateString_2022
0,리선권,개성공단 남북공동위원회 통행통신통관 분과위원회,2013.09 ~ 2014.01
1,조경철,故 김정일 국가장의위원회 위원,2011.12


In [51]:
# leaderjoblink - outer join for descriptive statistics
ljob_outer = lcl.merge(jobs_ingov,on=job_key_columns,how="outer",indicator=True)
merge_results(ljob_outer)


Merge Results...

	shape     : (12795, 13)
	left_only : (9569, 13)
	both      : (3226, 13)
	right_only: (0, 13)


In [52]:
# no right_only confirms no mismatched career keys in careerorglink
# left-only indicates all of the excluded career items

In [53]:
# leaderjoblink - inner join for the table we will use to make transitions
ljob = lcl.merge(jobs_ingov,on=job_key_columns,how="inner",indicator=True)
merge_results(ljob)


Merge Results...

	shape     : (3226, 13)
	left_only : (0, 13)
	both      : (3226, 13)
	right_only: (0, 13)


In [54]:
# instantiate CareerStartDate
ljob["CareerStartDate"]=np.nan

In [55]:
# CareerStartDate when CareerStartMonth is null
selection = ljob["CareerStartMonth"].isna()
ljob.loc[selection,"CareerStartDate"] = ljob[selection].apply(lambda x: str(x["CareerStartYear"]) + "00",axis=1)

In [56]:
# CareerStartDate when CareerStartMonth has 2 digits
selection = (ljob["CareerStartMonth"].notna()) & (ljob.apply(lambda x: len(str(x["CareerStartMonth"])),axis=1)==2)
ljob.loc[selection,"CareerStartDate"] = ljob[selection].apply(lambda x: str(x["CareerStartYear"]) + str(x["CareerStartMonth"]),axis=1)

In [57]:
# CareerStartDate when CareerStartMonth has 1 digit
selection = (ljob["CareerStartMonth"].notna()) & (ljob.apply(lambda x: len(str(x["CareerStartMonth"])),axis=1)==1)
ljob.loc[selection,"CareerStartDate"] = ljob[selection].apply(lambda x: str(x["CareerStartYear"]) + "0" + str(x["CareerStartMonth"]),axis=1)

In [58]:
ljob.columns

Index(['LeaderID', 'CareerString', 'CareerDateString_2022', 'CareerStartYear',
       'CareerStartMonth', 'CareerSubstring', 'InstitutionType',
       'PrimaryInstitution', 'OrgName', 'Position', 'OrgRank', 'PositionRank',
       '_merge', 'CareerStartDate'],
      dtype='object')

In [59]:
ljob_columns = ['LeaderID', 'CareerString', 'CareerDateString_2022','CareerStartYear', 'CareerStartMonth','CareerStartDate','CareerSubstring', 'InstitutionType',
       'PrimaryInstitution', 'OrgName', 'Position','OrgRank', 'PositionRank']

In [60]:
ljob = ljob[ljob_columns]

In [61]:
ljob.head(2)

Unnamed: 0,LeaderID,CareerString,CareerDateString_2022,CareerStartYear,CareerStartMonth,CareerStartDate,CareerSubstring,InstitutionType,PrimaryInstitution,OrgName,Position,OrgRank,PositionRank
0,리원일,노동성 상(유임),1999.02,1999,2.0,199902,,정권기관,내각,노동성,상,1,1
1,리하일,인민무력부 작전국 국장,1975.0,1975,,197500,,정권기관,정무원,인민무력부A_작전국,국장,2,1


#### descriptive statistics

In [62]:
# unique leaders, overall
len(lcl.LeaderID.unique())

637

In [63]:
# unique leaders, with jobs included
len(ljob.LeaderID.unique())

505

In [64]:
# leaders not having jobs that satisfy our critiria; excluded from analysis
len(lcl.LeaderID.unique())-len(ljob.LeaderID.unique())

132

In [65]:
# job items, after pairing with leaders (some jobs reported for more than one leader)
ljob.shape

(3226, 13)

In [66]:
# unique positions
position_key_columns = ['PrimaryInstitution', 'OrgName','Position']
ljob[position_key_columns].drop_duplicates().shape

(955, 3)

In [67]:
# unique orgs
org_key_columns = ['PrimaryInstitution', 'OrgName']
ljob[org_key_columns].drop_duplicates().shape

(622, 2)

# Step. calculate time-adjacent transitions for each leader

In [68]:
def calculate_resume_transitions(lc,leaderid):

    t = lc[(lc["LeaderID"]==leaderid) & (lc["CareerStartYear"].notna())]
    t = t.drop_duplicates(["CareerStartYear","PrimaryInstitution","OrgName"],keep="first")
    t = t.sort_values("CareerStartDate",ignore_index=True)
    # print("resume:",t.shape)
    
    tr = t.merge(t,on="LeaderID",how="inner",suffixes=("_1","_2"))
    # sort based on CareerStartDates
    tr = tr.sort_values(["CareerStartDate_1","CareerStartDate_2"])
    
    # drop keys where CareerStartDate_1 <= CareerStartDate_2
    tr = tr[tr.apply(lambda x: x["CareerStartDate_1"] < x["CareerStartDate_2"],axis=1)]
    #tr.shape
    
    # smallest next startdate
    dates = tr[["CareerStartDate_1","CareerStartDate_2"]]
    #dates
    mindate = dates.groupby("CareerStartDate_1",as_index=False).agg({"CareerStartDate_2":"min"})
    mindate.columns = ["CareerStartDate_1","CareerStartDate_2_min"]
    #mindate
    
    tr2 = tr.merge(mindate,on="CareerStartDate_1",how="left")
    #tr2.shape
    
    tr3 = tr2[tr2.apply(lambda x: x["CareerStartDate_2"] == x["CareerStartDate_2_min"],axis=1)]
    # print("transitions:",tr3.shape)
    
    #print(tr3[some_columns])
    
    print(leaderid,tr3.shape[0],"transitions")

    return tr3

#### a good example of many of the problems we need to address

In [69]:
leaderid = "강관주"
resume = calculate_resume_transitions(ljob,leaderid)

강관주 9 transitions


In [70]:
some_columns = ["LeaderID","CareerStartDate_1","PrimaryInstitution_1","OrgName_1","Position_1","CareerStartDate_2","PrimaryInstitution_2","OrgName_2","Position_2"]
resume[some_columns]

Unnamed: 0,LeaderID,CareerStartDate_1,PrimaryInstitution_1,OrgName_1,Position_1,CareerStartDate_2,PrimaryInstitution_2,OrgName_2,Position_2
0,강관주,197300,노동당,당중앙위원회_문화예술부_X과,과장,198600,노동당,당중앙위원회_조직지도부,부부장
8,강관주,198600,노동당,당중앙위원회_조직지도부,부부장,198800,노동당,당중앙위원회_통일전선부,부부장
15,강관주,198800,노동당,당중앙위원회_통일전선부,부부장,198903,노동당,당중앙위원회_통일전선부,제1부부장
21,강관주,198903,노동당,당중앙위원회_통일전선부,제1부부장,199008,내각,조국평화통일위원회,부위원장
22,강관주,198903,노동당,당중앙위원회_통일전선부,제1부부장,199008,정무원,조국평화통일위원회,부위원장
26,강관주,199008,내각,조국평화통일위원회,부위원장,199301,노동당,당중앙위원회_통일전선부,부장
27,강관주,199008,정무원,조국평화통일위원회,부위원장,199301,노동당,당중앙위원회_통일전선부,부장
32,강관주,199301,노동당,당중앙위원회_통일전선부,부장,199702,노동당,당중앙위원회_대외연락부,부장
34,강관주,199702,노동당,당중앙위원회_대외연락부,부장,200900,내각,225국,국장


In [71]:
leaderids = list(ljob["LeaderID"].unique())

In [72]:
len(leaderids)

505

In [73]:
def compile_leader_transitions(lc):
    
    leaderids = list(lc["LeaderID"].unique())
    
    leader_resumes = []
    
    for leaderid in leaderids:
        leader_resume = calculate_resume_transitions(lc,leaderid)
        leader_resumes.append(leader_resume)
        
    lct = pd.concat(leader_resumes,ignore_index=True)
    
    return lct

In [74]:
transitions = compile_leader_transitions(ljob)

리원일 6 transitions
리하일 4 transitions
최희태 0 transitions
김창룡 0 transitions
김인식 0 transitions
리영호 2 transitions
최태복 13 transitions
김영일a 14 transitions
주상성 5 transitions
강동윤 2 transitions
김일성 21 transitions
김일본 20 transitions
최광 15 transitions
백학림 21 transitions
박성철a 36 transitions
김중린 9 transitions
리종옥 28 transitions
리을설 13 transitions
김익현 6 transitions
김의순 10 transitions
오진우 10 transitions
김영주 13 transitions
김철만 10 transitions
백인준 0 transitions
최영림 17 transitions
김영남 17 transitions
전병호 9 transitions
계응태 16 transitions
강석숭 7 transitions
김복신 11 transitions
김원균 0 transitions
김영춘 6 transitions
김기남 14 transitions
채희정 16 transitions
길재경 5 transitions
양형섭 6 transitions
박용석 8 transitions
전하철 7 transitions
장병규 0 transitions
김국태 11 transitions
김정일 10 transitions
리용무 6 transitions
리종혁 5 transitions
오극렬 6 transitions
리종산 2 transitions
송호경 11 transitions
김락희 9 transitions
권희경 5 transitions
황순희 0 transitions
김룡연 1 transitions
김풍기 3 transitions
전희정 11 transitions
리길송 19 transitions
김일철a 6 transitions
현철

한룡국 0 transitions
김춘섭 0 transitions
리형근 0 transitions
김충성 0 transitions
기광호 2 transitions
고정범 0 transitions
김유일 0 transitions
김재성 2 transitions
손철주 1 transitions
김용연 1 transitions
오춘복 0 transitions
김천균 1 transitions
채성학 0 transitions
리학철 1 transitions
장혁 1 transitions
장춘성 0 transitions
김윤혁b 0 transitions
주용일 0 transitions
김영훈b 0 transitions
김일국 2 transitions
원길우 1 transitions
손광호 0 transitions
렴철성 0 transitions
박수일 2 transitions
장윤곤 0 transitions
김두일 1 transitions
안금철 0 transitions
주영길 0 transitions
리정남 1 transitions
김성일 0 transitions
리히용 0 transitions
박만호 0 transitions
오경석 0 transitions
김충일 0 transitions
김광혁 0 transitions
최영호a 0 transitions
리용주 0 transitions
김철규 0 transitions
장길룡 1 transitions
마종선 0 transitions
박영호 0 transitions
최정룡 0 transitions
박창호 1 transitions
량정훈 0 transitions


#### add OrgAdvance, PositionAdvance variables

In [75]:
transitions["OrgAdvance"] = transitions.apply(lambda x: int(x["OrgRank_1"]) - int(x["OrgRank_2"]),axis=1)

In [76]:
transitions["PositionAdvance"] = transitions.apply(lambda x: int(x["PositionRank_1"]) - int(x["PositionRank_2"]),axis=1)

#### descriptive statistics

In [77]:
# leaders with 1 or more job transition
len(transitions.LeaderID.unique())

381

In [78]:
# leaders with only 1 job; excluded from analysis
len(ljob.LeaderID.unique())-len(transitions.LeaderID.unique())

124

In [79]:
# number of transitions
transitions.shape

(1813, 28)

In [80]:
transition_counts = transitions.groupby("LeaderID")["Position_1"].count()

In [81]:
min(transition_counts)

1

In [82]:
max(transition_counts)

36

In [83]:
mean(transition_counts)

4.758530183727034

In [84]:
mode(transition_counts)

1

#### frequency of leaders by # of job transitions

In [85]:
x_axis = []
transition_count_distribution = []
for i in range(0,max(transition_counts)+1):
    pdf = sum([1 for count in transition_counts if count ==i])
    # print(i,pdf)
    x_axis.append(i) 
    transition_count_distribution.append(pdf)

In [86]:
transition_count_freq = pd.DataFrame({"Count":x_axis,"Frequency":transition_count_distribution})

In [87]:
# filename_transition_count_freq_alljobs = "transition_count_freq_alljobs.xlsx"
filename_transition_count_freq_ingov = "transition_count_freq_ingov.xlsx"

transition_count_freq.to_excel(path_analysis + current_subpath + filename_transition_count_freq_ingov,index=False)

# Export Query

#### format transitions query for export

In [88]:
transitions.columns

Index(['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
       'OrgName_1', 'Position_1', 'OrgRank_1', 'PositionRank_1',
       'CareerString_2', 'CareerDateString_2022_2', 'CareerStartYear_2',
       'CareerStartMonth_2', 'CareerStartDate_2', 'CareerSubstring_2',
       'InstitutionType_2', 'PrimaryInstitution_2', 'OrgName_2', 'Position_2',
       'OrgRank_2', 'PositionRank_2', 'CareerStartDate_2_min', 'OrgAdvance',
       'PositionAdvance'],
      dtype='object')

In [89]:
# omit CareerStartDate_2_min and any other fields we don't need

transitions_columns = ['LeaderID', 'CareerString_1', 'CareerDateString_2022_1',
       'CareerStartYear_1', 'CareerStartMonth_1', 'CareerStartDate_1',
       'CareerSubstring_1', 'InstitutionType_1', 'PrimaryInstitution_1',
        'OrgName_1', 'Position_1', 'OrgRank_1', 'PositionRank_1',
        'CareerString_2', 'CareerDateString_2022_2', 'CareerStartYear_2',
        'CareerStartMonth_2', 'CareerStartDate_2', 'CareerSubstring_2',
        'InstitutionType_2', 'PrimaryInstitution_2', 'OrgName_2', 'Position_2',
        'OrgRank_2', 'PositionRank_2','OrgAdvance','PositionAdvance']

In [90]:
transitions = transitions[transitions_columns]

#### export query

In [91]:
# refresh leaderjobtransition query
transitions.to_excel(path_queries + filename_leaderjobtransition_ingov,index=False)