In [1]:
# Import dependencies

import pandas as pd
import numpy as np
import matplotlib as matplot
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Clean data

In [2]:
# Import data

df = pd.read_csv('Resources/staff_assignments_24-25.csv')
df.head()

Unnamed: 0,Employee Number,Employee,Position Type Code,Position Distributions Assignment Type Descriptions,Entered FTE,Hourly Pay,Position Distributions Building Descriptions,Assignment Total Pay,Start Date,End Date,Active,Hire Date Original,Termination Date,Termination Description,Calendar Code,Ethnicity and Race,Gender,Birth Date
0,20337,"AAKER, DANIEL J",LICENSED,INSTR EBD,1.0,61.71,NORTH EDUCATION CENTER,89926.75,07/01/2024,06/09/2025,True,08/21/2012,,,2209 184,"Non-Hispanic/Latino, White",M - Male,04/15/1981
1,20337,"AAKER, DANIEL J",SUMMERTH,SUMMER PROJECT,0.1,60.27,NORTH EDUCATION CENTER,1880.42,07/01/2024,08/23/2024,True,08/21/2012,,,SUMMER24,"Non-Hispanic/Latino, White",M - Male,04/15/1981
2,20337,"AAKER, DANIEL J",TEMPHIRE,INSTR EBD,0.1,61.71,DISTRICT SERVICE CENTER,438.14,02/20/2025,05/30/2025,True,08/21/2012,,,TEMPHIRE,"Non-Hispanic/Latino, White",M - Male,04/15/1981
3,21200,"AARON, ERIK L",ESP,EDUCATIONAL SUPPORT PROFESSION,0.9375,26.37,WEST EDUCATION CENTER,35797.28,07/01/2024,06/06/2025,True,12/12/2016,,,2209 181,"Non-Hispanic/Latino, Black",M - Male,12/13/1970
4,19657,"ABDALLA, AHMED E",ESP,EDUCATIONAL SUPPORT PROFESSION,1.0,29.41,SOUTH EDUCATION CENTER,42585.68,07/01/2024,06/06/2025,True,06/17/2008,,,2209 181,"Non-Hispanic/Latino, White",M - Male,11/21/1982


Determining the two types of promotions:

Mid-year promotions: Promotions occuring between school_year_start and fiscal_year_end
End-year promotions: Promotions occuring between fiscal_year_end and the NEXT school_year_start

The decision to use school_year_start as the cutoff date as opposed to fiscal_year_start is based on how assignments are entered into Skyward. 
If an employee:
1) Is promoted at the end of the school year AND
2) Does not start their new position until AFTER the start of the fiscal year AND
3) There was a gap in working dates between old position and new position

Their promotion will not be identified as a "End-year promotion" using the fiscal_year_start as the cutoff date since their new position will not be in effect yet.

Their promotion will not be identified as a "Mid-year promotion" for the next fiscal year because *most likely* their old assignment will not be entered in the next year and a change in position won't be identified using our code.

For example:

An ESP was promoted to an Admin Support position at the end of the school year. They don't start the Admin Support position until 8/1. Using the fiscal year as the promotions period, from 6/30 to 7/1 they do not show any promotions. They do not have an assignment entered from 7/1-8/1. On 8/1 they will start a new assignment as an Admin Support and the change from ESP to Admin Support will not be identified using our code.

In comparison, using school_year_start as the cutoff date, their promotion will be documented as an End-year promotion which compares their ending position on 6/30 to their position on the first day of the NEXT school year (first day of Welcome Week).

In [3]:
# Define school year dates
# School year is defined as Welcome Week to Welcome Week

school_year='23-24'

# First day of welcome week
school_year_start='2024-08-26'

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2268 entries, 0 to 2267
Data columns (total 18 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   Employee Number                                      2268 non-null   int64  
 1   Employee                                             2268 non-null   object 
 2   Position Type Code                                   2268 non-null   object 
 3   Position Distributions Assignment Type Descriptions  2268 non-null   object 
 4   Entered FTE                                          2268 non-null   float64
 5   Hourly Pay                                           2268 non-null   float64
 6   Position Distributions Building Descriptions         2268 non-null   object 
 7   Assignment Total Pay                                 2268 non-null   object 
 8   Start Date                                           2268 non-null  

In [5]:
df=df.rename(columns={'Position Type Code':'Position Type', 'Position Distributions Assignment Type Descriptions':'Position',\
          'Position Distributions Building Descriptions':'Site/Location', 'Hire Date Original':'Hire Date'})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2268 entries, 0 to 2267
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Employee Number          2268 non-null   int64  
 1   Employee                 2268 non-null   object 
 2   Position Type            2268 non-null   object 
 3   Position                 2268 non-null   object 
 4   Entered FTE              2268 non-null   float64
 5   Hourly Pay               2268 non-null   float64
 6   Site/Location            2268 non-null   object 
 7   Assignment Total Pay     2268 non-null   object 
 8   Start Date               2268 non-null   object 
 9   End Date                 2268 non-null   object 
 10  Active                   2268 non-null   bool   
 11  Hire Date                2268 non-null   object 
 12  Termination Date         215 non-null    object 
 13  Termination Description  203 non-null    object 
 14  Calendar Code           

In [6]:
df['Position Type'].value_counts()

Position Type
ESP         453
LICENSED    399
EXTENDED    364
TEMP NSO    308
SUMMERTH    200
TEMPHIRE    133
WSSS        110
284          85
UNAFF        56
ADMIN        28
TEMPCONT     22
COCURRIC     17
INTERPRE     17
OVERAGE      16
BOARD        16
ASSTPRIN     15
SSC          10
PRINCIPL      7
CAREER        6
HCSPECIA      3
EMPLOAN       3
Name: count, dtype: int64

In [7]:
# Omit summer and some temp positions; most if not all of these staff have a primary assignment during the regular school year 

exclude=['EXTENDED', 'SUMMERTH', 'WSSS', 'OVERAGE', 'EMPLOAN', 'TEMP NSO', '24TEMPCO', 'COCURRIC', 'CAREER', 'TEMP CR']
df=df.loc[~df['Position Type'].isin(exclude)]
df['Position Type'].value_counts()

Position Type
ESP         453
LICENSED    399
TEMPHIRE    133
284          85
UNAFF        56
ADMIN        28
TEMPCONT     22
INTERPRE     17
BOARD        16
ASSTPRIN     15
SSC          10
PRINCIPL      7
HCSPECIA      3
Name: count, dtype: int64

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1244 entries, 0 to 2267
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Employee Number          1244 non-null   int64  
 1   Employee                 1244 non-null   object 
 2   Position Type            1244 non-null   object 
 3   Position                 1244 non-null   object 
 4   Entered FTE              1244 non-null   float64
 5   Hourly Pay               1244 non-null   float64
 6   Site/Location            1244 non-null   object 
 7   Assignment Total Pay     1244 non-null   object 
 8   Start Date               1244 non-null   object 
 9   End Date                 1244 non-null   object 
 10  Active                   1244 non-null   bool   
 11  Hire Date                1244 non-null   object 
 12  Termination Date         136 non-null    object 
 13  Termination Description  130 non-null    object 
 14  Calendar Code            1244

In [9]:
# Convert Assignment Total Pay to float
df['Assignment Total Pay'] = df['Assignment Total Pay'].str.replace(',', '').astype(float)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1244 entries, 0 to 2267
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Employee Number          1244 non-null   int64  
 1   Employee                 1244 non-null   object 
 2   Position Type            1244 non-null   object 
 3   Position                 1244 non-null   object 
 4   Entered FTE              1244 non-null   float64
 5   Hourly Pay               1244 non-null   float64
 6   Site/Location            1244 non-null   object 
 7   Assignment Total Pay     1244 non-null   float64
 8   Start Date               1244 non-null   object 
 9   End Date                 1244 non-null   object 
 10  Active                   1244 non-null   bool   
 11  Hire Date                1244 non-null   object 
 12  Termination Date         136 non-null    object 
 13  Termination Description  130 non-null    object 
 14  Calendar Code            1244

In [10]:
# Drop assignments where Assignment Total Pay is zero - staff did not work in this role during this period
df=df.loc[~(df['Assignment Total Pay']==0)]
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1204 entries, 0 to 2267
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Employee Number          1204 non-null   int64  
 1   Employee                 1204 non-null   object 
 2   Position Type            1204 non-null   object 
 3   Position                 1204 non-null   object 
 4   Entered FTE              1204 non-null   float64
 5   Hourly Pay               1204 non-null   float64
 6   Site/Location            1204 non-null   object 
 7   Assignment Total Pay     1204 non-null   float64
 8   Start Date               1204 non-null   object 
 9   End Date                 1204 non-null   object 
 10  Active                   1204 non-null   bool   
 11  Hire Date                1204 non-null   object 
 12  Termination Date         131 non-null    object 
 13  Termination Description  126 non-null    object 
 14  Calendar Code            1204

In [11]:
df['Start Date']=pd.to_datetime(df['Start Date'])
df['Hire Date']=pd.to_datetime(df['Hire Date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1204 entries, 0 to 2267
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Employee Number          1204 non-null   int64         
 1   Employee                 1204 non-null   object        
 2   Position Type            1204 non-null   object        
 3   Position                 1204 non-null   object        
 4   Entered FTE              1204 non-null   float64       
 5   Hourly Pay               1204 non-null   float64       
 6   Site/Location            1204 non-null   object        
 7   Assignment Total Pay     1204 non-null   float64       
 8   Start Date               1204 non-null   datetime64[ns]
 9   End Date                 1204 non-null   object        
 10  Active                   1204 non-null   bool          
 11  Hire Date                1204 non-null   datetime64[ns]
 12  Termination Date         131 non-null  

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1204 entries, 0 to 2267
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Employee Number          1204 non-null   int64         
 1   Employee                 1204 non-null   object        
 2   Position Type            1204 non-null   object        
 3   Position                 1204 non-null   object        
 4   Entered FTE              1204 non-null   float64       
 5   Hourly Pay               1204 non-null   float64       
 6   Site/Location            1204 non-null   object        
 7   Assignment Total Pay     1204 non-null   float64       
 8   Start Date               1204 non-null   datetime64[ns]
 9   End Date                 1204 non-null   object        
 10  Active                   1204 non-null   bool          
 11  Hire Date                1204 non-null   datetime64[ns]
 12  Termination Date         131 non-null  

In [13]:
# Output dataframe to be used in End-year promotion analysis

df.to_csv('Resources/cleaned_staff_assignments_24-25.csv', index=False)

## Identify promotions

In [14]:
# Identify employees that have more than one assignment to parse through for potential promotions.

duplicates_df = df[df.duplicated(subset='Employee Number', keep=False)].sort_values(by=['Employee Number', 'Start Date'])
duplicates_df.head()

Unnamed: 0,Employee Number,Employee,Position Type,Position,Entered FTE,Hourly Pay,Site/Location,Assignment Total Pay,Start Date,End Date,Active,Hire Date,Termination Date,Termination Description,Calendar Code,Ethnicity and Race,Gender,Birth Date
1385,14516,"MITCHELL, ROSE M",284,ADMINISTRATIVE SUPPORT,1.0,30.0,DISTRICT SERVICE CENTER,62640.0,2024-07-01,06/30/2025,True,1991-07-29,,,12m24pd,"Non-Hispanic/Latino, Black",F - Female,06/24/1959
1386,14516,"MITCHELL, ROSE M",TEMPHIRE,ADMINISTRATIVE SUPPORT,1.0,45.0,DISTRICT SERVICE CENTER,7920.0,2024-07-01,10/31/2024,True,1991-07-29,,,SUMMER24,"Non-Hispanic/Latino, Black",F - Female,06/24/1959
827,14563,"HAWLEY, STEPHANIE A",LICENSED,INSTRUCTOR ASD,1.0,64.08,SOUTH EDUCATION CENTER,94329.0,2024-07-01,06/09/2025,True,1994-08-25,,,2209 184,"Non-Hispanic/Latino, White",F - Female,07/01/1970
830,14563,"HAWLEY, STEPHANIE A",TEMPHIRE,ITRAC JOURNEY WORKER,0.1,64.08,SOUTH EDUCATION CENTER,25.63,2024-08-19,08/22/2024,True,1994-08-25,,,SUMMER24,"Non-Hispanic/Latino, White",F - Female,07/01/1970
829,14563,"HAWLEY, STEPHANIE A",TEMPCONT,ITRAC JOURNEY WORKER,0.1,20.08,SOUTH EDUCATION CENTER,5000.0,2024-08-26,06/06/2025,True,1994-08-25,,,TEMPHIRE,"Non-Hispanic/Latino, White",F - Female,07/01/1970


In [15]:
duplicates_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 290 entries, 1385 to 689
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Employee Number          290 non-null    int64         
 1   Employee                 290 non-null    object        
 2   Position Type            290 non-null    object        
 3   Position                 290 non-null    object        
 4   Entered FTE              290 non-null    float64       
 5   Hourly Pay               290 non-null    float64       
 6   Site/Location            290 non-null    object        
 7   Assignment Total Pay     290 non-null    float64       
 8   Start Date               290 non-null    datetime64[ns]
 9   End Date                 290 non-null    object        
 10  Active                   290 non-null    bool          
 11  Hire Date                290 non-null    datetime64[ns]
 12  Termination Date         12 non-null  

In [273]:
# # Test code 
# positions_df=duplicates_df.loc[duplicates_df['Employee Number']==20970].sort_values('Start Date')
# positions = positions_df[['Position', 'Position Type']]
# # tuple(positions.iloc[0])
# if tuple(positions.iloc[0]) != tuple(positions.iloc[1]):
#     print('Possible promo')

In [17]:
# Get list of duplicate employee numbers
duplicate_numbers=duplicates_df['Employee Number'].unique()

In [18]:
df['Position Type'].value_counts()

Position Type
ESP         419
LICENSED    396
TEMPHIRE    133
284          85
UNAFF        56
ADMIN        25
TEMPCONT     22
INTERPRE     17
BOARD        16
ASSTPRIN     15
SSC          10
PRINCIPL      7
HCSPECIA      3
Name: count, dtype: int64

Next, compare positions/position types for each employee with more than one assignment to find promotions.

Loop through the data to find:

a) Cases where an employee changed to a unique Position/Position Type combination

b) For all employees in step a, identify possible promos involving a temp staff where the following criteria are met:
-Staff must have started off working as a temp staff (first position type in the period must be 'TEMPHIRE') AND
-Temp staff has to be hired into a different position (being hired on as a permanent staff for the same position doesn't count)

** The purpose of step b is to determine any promotions involving temp staff that occurred before dropping all temp assignments, drastically decreasing the number of staff assignments that have to be manually analyzed for promotions **

In [20]:
# Step 2: Compare positions/position types for each employee with more than one assignment to find promotions.

temp_types=['TEMPHIRE', 'TEMPCONT']

# List to hold potential promo employee numbers
possible_promos = []

# Create list to hold possible temp promos
possible_temp_promos=[]

# Step a: Loop through duplicates dataframe and identify unique role changes
for number in duplicate_numbers:
    # Filter rows for the current employee and sort them by start date
    positions_df=duplicates_df.loc[duplicates_df['Employee Number']==number].sort_values('Start Date')

    # Select only the columns related to position information
    positions = positions_df[['Position', 'Position Type']]

    # Drop duplicate position entries (consecutive duplicates are treated as the same role)
    unique_roles = positions.drop_duplicates()

    # If there is more than one unique role, it's a possible promotion
    if len(unique_roles) > 1:
        possible_promos.append(number)
        
        # Step b: Check if the first role was a temporary one and changed to a different position
        first_type = positions_df['Position Type'].iloc[0]
        first_position = positions_df['Position'].iloc[0]
        second_position = positions_df['Position'].iloc[1]

        if (first_type in temp_types) and (first_position != second_position): 
            possible_temp_promos.append(number)
            
# Number of possible promoted employees identified
print(f'Possible promos: {len(possible_promos)}')

print(f'Possible temp promos: {len(possible_temp_promos)}')

Possible promos: 121
Possible temp promos: 3


In [21]:
# Create dataframe with employees with possible promotions

possible_promos_df=df.loc[df['Employee Number'].isin(possible_promos)]
possible_promos_df=possible_promos_df.sort_values(by=['Employee Number', 'Start Date'])
possible_promos_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 269 entries, 1385 to 689
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Employee Number          269 non-null    int64         
 1   Employee                 269 non-null    object        
 2   Position Type            269 non-null    object        
 3   Position                 269 non-null    object        
 4   Entered FTE              269 non-null    float64       
 5   Hourly Pay               269 non-null    float64       
 6   Site/Location            269 non-null    object        
 7   Assignment Total Pay     269 non-null    float64       
 8   Start Date               269 non-null    datetime64[ns]
 9   End Date                 269 non-null    object        
 10  Active                   269 non-null    bool          
 11  Hire Date                269 non-null    datetime64[ns]
 12  Termination Date         10 non-null  

In [22]:
possible_promos_df['Position Type'].value_counts()

Position Type
TEMPHIRE    104
ESP          57
LICENSED     54
284          18
TEMPCONT     18
UNAFF        11
ADMIN         3
INTERPRE      1
ASSTPRIN      1
PRINCIPL      1
HCSPECIA      1
Name: count, dtype: int64

In [23]:
# Create Promotion column to mark identified promotions
possible_promos_df['Promotion']=np.nan
possible_promos_df['Old/New']=np.nan
possible_promos_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 269 entries, 1385 to 689
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Employee Number          269 non-null    int64         
 1   Employee                 269 non-null    object        
 2   Position Type            269 non-null    object        
 3   Position                 269 non-null    object        
 4   Entered FTE              269 non-null    float64       
 5   Hourly Pay               269 non-null    float64       
 6   Site/Location            269 non-null    object        
 7   Assignment Total Pay     269 non-null    float64       
 8   Start Date               269 non-null    datetime64[ns]
 9   End Date                 269 non-null    object        
 10  Active                   269 non-null    bool          
 11  Hire Date                269 non-null    datetime64[ns]
 12  Termination Date         10 non-null  

In [24]:
# Analyze temp staff identified in Step 2 for promotions before dropping all temp staff from possible promotions dataframe. 
# Consult HR if necessary. 

possible_promos_df[possible_promos_df['Employee Number'].isin(possible_temp_promos)]

Unnamed: 0,Employee Number,Employee,Position Type,Position,Entered FTE,Hourly Pay,Site/Location,Assignment Total Pay,Start Date,End Date,Active,Hire Date,Termination Date,Termination Description,Calendar Code,Ethnicity and Race,Gender,Birth Date,Promotion,Old/New
512,22526,"DRESSEL, ALLYSON A",TEMPHIRE,SUMMER PROJECT,0.1,44.89,CAREER & TECH CENTER - E.P. CAMPUS,103.25,2024-08-14,09/15/2024,False,2024-06-03,07/15/2025,RESIGNATION,12m24pd,"Non-Hispanic/Latino, White",F - Female,08/21/1976,,
511,22526,"DRESSEL, ALLYSON A",LICENSED,INSTR MEDICAL CAREERS,1.0,44.89,CAREER & TECH CENTER - E.P. CAMPUS,66072.0,2024-08-26,06/09/2025,False,2024-06-03,07/15/2025,RESIGNATION,2209 184,"Non-Hispanic/Latino, White",F - Female,08/21/1976,,
1840,22577,"SCHWAHN, BRIAN R",TEMPHIRE,SUMMER PROJECT,0.1,44.52,CAREER & TECH CENTER - E.P. CAMPUS,102.4,2024-08-14,09/15/2024,True,2024-07-08,,,12m24pd,"Non-Hispanic/Latino, White",M - Male,12/25/1966,,
1839,22577,"SCHWAHN, BRIAN R",LICENSED,INSTRU COMM TECH CAREER,1.0,44.52,CAREER & TECH CENTER - E.P. CAMPUS,65530.0,2024-08-26,06/09/2025,True,2024-07-08,,,2209 184,"Non-Hispanic/Latino, White",M - Male,12/25/1966,,
39,22624,"ALLBEE, NICOLE L",TEMPHIRE,ADMINISTRATIVE SUPPORT,1.0,21.44,District Office,20582.4,2024-07-15,06/30/2025,True,2024-07-08,,,TEMPHIRE,"Non-Hispanic/Latino, White",F - Female,12/30/1987,,
40,22624,"ALLBEE, NICOLE L",TEMPHIRE,HR TEMP HIRE,0.1,21.44,DISTRICT SERVICE CENTER,180.1,2024-08-27,12/20/2024,True,2024-07-08,,,12m24pd,"Non-Hispanic/Latino, White",F - Female,12/30/1987,,


In [281]:
# In the Promotion column, mark all employees that have an identified promotion with a "1" 
# possible_promos_df.loc[possible_promos_df['Employee Number']==EMP NUMBER HERE, 'Promotion']=1

In [25]:
possible_promos_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 269 entries, 1385 to 689
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Employee Number          269 non-null    int64         
 1   Employee                 269 non-null    object        
 2   Position Type            269 non-null    object        
 3   Position                 269 non-null    object        
 4   Entered FTE              269 non-null    float64       
 5   Hourly Pay               269 non-null    float64       
 6   Site/Location            269 non-null    object        
 7   Assignment Total Pay     269 non-null    float64       
 8   Start Date               269 non-null    datetime64[ns]
 9   End Date                 269 non-null    object        
 10  Active                   269 non-null    bool          
 11  Hire Date                269 non-null    datetime64[ns]
 12  Termination Date         10 non-null  

In [26]:
# Drop all temp positions that do not have a promotion.
possible_promos_df=possible_promos_df.loc[~((possible_promos_df['Position Type'].isin(temp_types)) & (possible_promos_df['Promotion'].isna()))]
possible_promos_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 147 entries, 1385 to 688
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Employee Number          147 non-null    int64         
 1   Employee                 147 non-null    object        
 2   Position Type            147 non-null    object        
 3   Position                 147 non-null    object        
 4   Entered FTE              147 non-null    float64       
 5   Hourly Pay               147 non-null    float64       
 6   Site/Location            147 non-null    object        
 7   Assignment Total Pay     147 non-null    float64       
 8   Start Date               147 non-null    datetime64[ns]
 9   End Date                 147 non-null    object        
 10  Active                   147 non-null    bool          
 11  Hire Date                147 non-null    datetime64[ns]
 12  Termination Date         6 non-null   

In [27]:
# Step 3: Re-filter remaining possible promos to only keep employees with more than one assignment (after dropping temp assignments)
possible_promos_df=possible_promos_df[possible_promos_df.duplicated(subset='Employee Number', keep=False)]
possible_promos_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 53 entries, 1158 to 2084
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Employee Number          53 non-null     int64         
 1   Employee                 53 non-null     object        
 2   Position Type            53 non-null     object        
 3   Position                 53 non-null     object        
 4   Entered FTE              53 non-null     float64       
 5   Hourly Pay               53 non-null     float64       
 6   Site/Location            53 non-null     object        
 7   Assignment Total Pay     53 non-null     float64       
 8   Start Date               53 non-null     datetime64[ns]
 9   End Date                 53 non-null     object        
 10  Active                   53 non-null     bool          
 11  Hire Date                53 non-null     datetime64[ns]
 12  Termination Date         2 non-null   

In [28]:
# Step 4: Manually sift through possible promos to identify true promotions

# 1. Output dataframe. 
# 2. Make a copy of the file and store in the Resources folder so you don't overwrite the file.
# 3. Open the COPY of the file and go through each employee one-by-one to identify promotions.
# 4. Mark promotions with a 1 in Promotion column (both old/new positions) and fill in either "Old' or "New" in the Old/New column
# 5. Remember to IGNORE any promotions that happened between 7/1 and school_year_start (these promotions should be included with the 
# previous year promotions, consistent with our defined promotions period).

possible_promos_df.to_csv('Outputs/possible_mid_year_promos_24-25.csv', index=False)

## Load Promotion Data

In [2]:
# After identifying promotions manually, load dataset 
promos_df = pd.read_csv('Resources/possible_mid_year_promos_24-25_cleaned.csv')
promos_df.head()

Unnamed: 0,Employee Number,Employee,Position Type,Position,Entered FTE,Hourly Pay,Site/Location,Assignment Total Pay,Start Date,End Date,Active,Hire Date,Termination Date,Termination Description,Calendar Code,Ethnicity and Race,Gender,Birth Date,Promotion,Old/New
0,18038,"LAM, ELIZABETH A",ADMIN,"SENIOR MANAGER, SPECIAL ED",1.0,54.81,DISTRICT SERVICE CENTER,35078.62,7/1/2024,10/20/2024,True,8/24/2006,,,12m24pd,"Non-Hispanic/Latino, White",F - Female,6/30/1982,1.0,Old
1,18038,"LAM, ELIZABETH A",ADMIN,ASST DIRECTOR OF SPEC ED,1.0,61.3,DISTRICT SERVICE CENTER,88766.28,10/21/2024,6/30/2025,True,8/24/2006,,,12m24pd,"Non-Hispanic/Latino, White",F - Female,6/30/1982,1.0,New
2,18377,"MOEN, KENDRA M",284,ADMINISTRATIVE SUPPORT,1.0,27.8,NORTH EDUCATION CENTER,18904.0,7/1/2024,10/27/2024,True,9/24/2001,,,12m24pd,"Non-Hispanic/Latino, White",F - Female,4/20/1982,1.0,Old
3,18377,"MOEN, KENDRA M",UNAFF,PROGRAM COORDINATOR (SS),1.0,39.08,DISTRICT SERVICE CENTER,55025.29,10/28/2024,6/30/2025,True,9/24/2001,,,12m24pd,"Non-Hispanic/Latino, White",F - Female,4/20/1982,1.0,New
4,20812,"ARBOGAST, BRYAN D",UNAFF,FACILITI OPERATIONS SUPERVISOR,1.0,40.69,DISTRICT SERVICE CENTER,84312.89,7/1/2024,6/26/2025,True,6/1/2015,,,12m24pd,"Non-Hispanic/Latino, White",M - Male,12/14/1987,1.0,Old


In [3]:
# Isolate old positions

old_position = promos_df.loc[promos_df['Old/New']=='Old']
old_position.head()

Unnamed: 0,Employee Number,Employee,Position Type,Position,Entered FTE,Hourly Pay,Site/Location,Assignment Total Pay,Start Date,End Date,Active,Hire Date,Termination Date,Termination Description,Calendar Code,Ethnicity and Race,Gender,Birth Date,Promotion,Old/New
0,18038,"LAM, ELIZABETH A",ADMIN,"SENIOR MANAGER, SPECIAL ED",1.0,54.81,DISTRICT SERVICE CENTER,35078.62,7/1/2024,10/20/2024,True,8/24/2006,,,12m24pd,"Non-Hispanic/Latino, White",F - Female,6/30/1982,1.0,Old
2,18377,"MOEN, KENDRA M",284,ADMINISTRATIVE SUPPORT,1.0,27.8,NORTH EDUCATION CENTER,18904.0,7/1/2024,10/27/2024,True,9/24/2001,,,12m24pd,"Non-Hispanic/Latino, White",F - Female,4/20/1982,1.0,Old
4,20812,"ARBOGAST, BRYAN D",UNAFF,FACILITI OPERATIONS SUPERVISOR,1.0,40.69,DISTRICT SERVICE CENTER,84312.89,7/1/2024,6/26/2025,True,6/1/2015,,,12m24pd,"Non-Hispanic/Latino, White",M - Male,12/14/1987,1.0,Old
6,21002,"DAWSON, ANGEL A",UNAFF,RESTORATIVE PRACTICES COORDINA,1.0,35.78,DISTRICT SERVICE CENTER,27190.59,7/1/2024,11/10/2024,True,4/5/2016,,,12m24pd,"Non-Hispanic/Latino, Black",F - Female,11/15/1977,1.0,Old
8,21003,"SENDOLO, VICTORIA K",ESP,EDUCATIONAL SUPPORT PROFESSION,0.9375,28.58,NORTH EDUCATION CENTER,4930.06,7/1/2024,9/26/2024,True,4/13/2016,,,2209 181,"Non-Hispanic/Latino, Black",F - Female,7/31/1986,1.0,Old


In [4]:
# Rename columns for old positions
old_position.rename(columns={'Position':'Old Position',\
                            'Site/Location':'Old Site/Location',\
                            'Hourly Pay':'Old Hourly Pay', 'Calendar Code':'Old Calendar Code',\
                            'Position Type':'Old Position Type'}, inplace=True)
old_position = old_position[['Employee Number', 'Employee', 'Old Position Type', 'Old Position', 'Old Site/Location', 'Old Hourly Pay', 'Old Calendar Code']]
old_position.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  old_position.rename(columns={'Position':'Old Position',\


Unnamed: 0,Employee Number,Employee,Old Position Type,Old Position,Old Site/Location,Old Hourly Pay,Old Calendar Code
0,18038,"LAM, ELIZABETH A",ADMIN,"SENIOR MANAGER, SPECIAL ED",DISTRICT SERVICE CENTER,54.81,12m24pd
2,18377,"MOEN, KENDRA M",284,ADMINISTRATIVE SUPPORT,NORTH EDUCATION CENTER,27.8,12m24pd
4,20812,"ARBOGAST, BRYAN D",UNAFF,FACILITI OPERATIONS SUPERVISOR,DISTRICT SERVICE CENTER,40.69,12m24pd
6,21002,"DAWSON, ANGEL A",UNAFF,RESTORATIVE PRACTICES COORDINA,DISTRICT SERVICE CENTER,35.78,12m24pd
8,21003,"SENDOLO, VICTORIA K",ESP,EDUCATIONAL SUPPORT PROFESSION,NORTH EDUCATION CENTER,28.58,2209 181


In [5]:
# Isolate new positions

new_position = promos_df.query('`Old/New` == "New"')
new_position.head()

Unnamed: 0,Employee Number,Employee,Position Type,Position,Entered FTE,Hourly Pay,Site/Location,Assignment Total Pay,Start Date,End Date,Active,Hire Date,Termination Date,Termination Description,Calendar Code,Ethnicity and Race,Gender,Birth Date,Promotion,Old/New
1,18038,"LAM, ELIZABETH A",ADMIN,ASST DIRECTOR OF SPEC ED,1.0,61.3,DISTRICT SERVICE CENTER,88766.28,10/21/2024,6/30/2025,True,8/24/2006,,,12m24pd,"Non-Hispanic/Latino, White",F - Female,6/30/1982,1.0,New
3,18377,"MOEN, KENDRA M",UNAFF,PROGRAM COORDINATOR (SS),1.0,39.08,DISTRICT SERVICE CENTER,55025.29,10/28/2024,6/30/2025,True,9/24/2001,,,12m24pd,"Non-Hispanic/Latino, White",F - Female,4/20/1982,1.0,New
5,20812,"ARBOGAST, BRYAN D",UNAFF,FAC OPERATIONS MANAGER,1.0,45.26,DISTRICT SERVICE CENTER,724.14,6/27/2025,6/30/2025,True,6/1/2015,,,12m24pd,"Non-Hispanic/Latino, White",M - Male,12/14/1987,1.0,New
7,21002,"DAWSON, ANGEL A",UNAFF,COMMUNITY SCHOOL COORDINATOR,1.0,41.67,DISTRICT SERVICE CENTER,55333.33,11/11/2024,6/30/2025,True,4/5/2016,,,12m24pd,"Non-Hispanic/Latino, Black",F - Female,11/15/1977,1.0,New
10,21003,"SENDOLO, VICTORIA K",LICENSED,INSTRUCTOR ASD,1.0,34.49,ANN BREMER EDUCATION CENTER,33112.17,12/2/2024,6/9/2025,True,4/13/2016,,,2209 184,"Non-Hispanic/Latino, Black",F - Female,7/31/1986,1.0,New


In [7]:
# Rename columns for new positions

new_position.rename(columns={'Position':'New Position',\
                               'Site/Location':'New Site/Location',\
                               'Hourly Pay':'New Hourly Pay', 'Calendar Code':'New Calendar Code',\
                            'Start Date':'New Position Start Date',
                            'Position Type':'New Position Type'}, inplace=True)
new_position = new_position[['Employee Number', 'Employee', 'New Position Type', 'New Position', 'New Site/Location', 'New Hourly Pay', 'New Calendar Code',\
                             'New Position Start Date', 'Hire Date', 'Termination Date', 'Ethnicity and Race', 'Gender', 'Birth Date']]
new_position.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_position.rename(columns={'Position':'New Position',\


Unnamed: 0,Employee Number,Employee,New Position Type,New Position,New Site/Location,New Hourly Pay,New Calendar Code,New Position Start Date,Hire Date,Termination Date,Ethnicity and Race,Gender,Birth Date
1,18038,"LAM, ELIZABETH A",ADMIN,ASST DIRECTOR OF SPEC ED,DISTRICT SERVICE CENTER,61.3,12m24pd,10/21/2024,8/24/2006,,"Non-Hispanic/Latino, White",F - Female,6/30/1982
3,18377,"MOEN, KENDRA M",UNAFF,PROGRAM COORDINATOR (SS),DISTRICT SERVICE CENTER,39.08,12m24pd,10/28/2024,9/24/2001,,"Non-Hispanic/Latino, White",F - Female,4/20/1982
5,20812,"ARBOGAST, BRYAN D",UNAFF,FAC OPERATIONS MANAGER,DISTRICT SERVICE CENTER,45.26,12m24pd,6/27/2025,6/1/2015,,"Non-Hispanic/Latino, White",M - Male,12/14/1987
7,21002,"DAWSON, ANGEL A",UNAFF,COMMUNITY SCHOOL COORDINATOR,DISTRICT SERVICE CENTER,41.67,12m24pd,11/11/2024,4/5/2016,,"Non-Hispanic/Latino, Black",F - Female,11/15/1977
10,21003,"SENDOLO, VICTORIA K",LICENSED,INSTRUCTOR ASD,ANN BREMER EDUCATION CENTER,34.49,2209 184,12/2/2024,4/13/2016,,"Non-Hispanic/Latino, Black",F - Female,7/31/1986


In [8]:
# Merge old and new positions
mid_year_promos = pd.merge(old_position, new_position, how='left', on=['Employee Number', 'Employee'])
mid_year_promos.head()

Unnamed: 0,Employee Number,Employee,Old Position Type,Old Position,Old Site/Location,Old Hourly Pay,Old Calendar Code,New Position Type,New Position,New Site/Location,New Hourly Pay,New Calendar Code,New Position Start Date,Hire Date,Termination Date,Ethnicity and Race,Gender,Birth Date
0,18038,"LAM, ELIZABETH A",ADMIN,"SENIOR MANAGER, SPECIAL ED",DISTRICT SERVICE CENTER,54.81,12m24pd,ADMIN,ASST DIRECTOR OF SPEC ED,DISTRICT SERVICE CENTER,61.3,12m24pd,10/21/2024,8/24/2006,,"Non-Hispanic/Latino, White",F - Female,6/30/1982
1,18377,"MOEN, KENDRA M",284,ADMINISTRATIVE SUPPORT,NORTH EDUCATION CENTER,27.8,12m24pd,UNAFF,PROGRAM COORDINATOR (SS),DISTRICT SERVICE CENTER,39.08,12m24pd,10/28/2024,9/24/2001,,"Non-Hispanic/Latino, White",F - Female,4/20/1982
2,20812,"ARBOGAST, BRYAN D",UNAFF,FACILITI OPERATIONS SUPERVISOR,DISTRICT SERVICE CENTER,40.69,12m24pd,UNAFF,FAC OPERATIONS MANAGER,DISTRICT SERVICE CENTER,45.26,12m24pd,6/27/2025,6/1/2015,,"Non-Hispanic/Latino, White",M - Male,12/14/1987
3,21002,"DAWSON, ANGEL A",UNAFF,RESTORATIVE PRACTICES COORDINA,DISTRICT SERVICE CENTER,35.78,12m24pd,UNAFF,COMMUNITY SCHOOL COORDINATOR,DISTRICT SERVICE CENTER,41.67,12m24pd,11/11/2024,4/5/2016,,"Non-Hispanic/Latino, Black",F - Female,11/15/1977
4,21003,"SENDOLO, VICTORIA K",ESP,EDUCATIONAL SUPPORT PROFESSION,NORTH EDUCATION CENTER,28.58,2209 181,LICENSED,INSTRUCTOR ASD,ANN BREMER EDUCATION CENTER,34.49,2209 184,12/2/2024,4/13/2016,,"Non-Hispanic/Latino, Black",F - Female,7/31/1986


In [9]:
mid_year_promos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Employee Number          22 non-null     int64  
 1   Employee                 22 non-null     object 
 2   Old Position Type        22 non-null     object 
 3   Old Position             22 non-null     object 
 4   Old Site/Location        22 non-null     object 
 5   Old Hourly Pay           22 non-null     float64
 6   Old Calendar Code        22 non-null     object 
 7   New Position Type        22 non-null     object 
 8   New Position             22 non-null     object 
 9   New Site/Location        22 non-null     object 
 10  New Hourly Pay           22 non-null     float64
 11  New Calendar Code        22 non-null     object 
 12  New Position Start Date  22 non-null     object 
 13  Hire Date                22 non-null     object 
 14  Termination Date         0 n

In [10]:
mid_year_promos['New Position Start Date']=pd.to_datetime(mid_year_promos['New Position Start Date'])

In [12]:
# Recall that our promotions period is school year (Welcome Week to Welcome Week). 
# We must omit any promotions that occurred before or on the first day of the school year
# These promotions should be included with the previous promotions analysis period

school_year_start='2024-08-26'

mid_year_promos=mid_year_promos.loc[~(mid_year_promos['New Position Start Date']<=school_year_start)]
mid_year_promos.info()

<class 'pandas.core.frame.DataFrame'>
Index: 22 entries, 0 to 21
Data columns (total 18 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Employee Number          22 non-null     int64         
 1   Employee                 22 non-null     object        
 2   Old Position Type        22 non-null     object        
 3   Old Position             22 non-null     object        
 4   Old Site/Location        22 non-null     object        
 5   Old Hourly Pay           22 non-null     float64       
 6   Old Calendar Code        22 non-null     object        
 7   New Position Type        22 non-null     object        
 8   New Position             22 non-null     object        
 9   New Site/Location        22 non-null     object        
 10  New Hourly Pay           22 non-null     float64       
 11  New Calendar Code        22 non-null     object        
 12  New Position Start Date  22 non-null     da

In [13]:
# Export mid-year promos
mid_year_promos.to_csv('Resources/FINAL_mid_year_promos_24.csv', index=False)