### Import libraries

In [63]:
import os
import pandas as pd
import numpy as np


### Define functions and file pathway for collecting excel files

In [64]:
# define the directory for the files
directory = r"C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24"

In [65]:
# function to combine data from Excel files
def combine_excel_files(input_directory, sheet_name, skiprows, nrows, usecols):
    all_data = []
    for file in os.listdir(input_directory):
        if file.endswith('.xlsx'):
            try:
                file_path = os.path.join(input_directory, file)
                print(f"Processing file: {file_path}")
                data = pd.read_excel(file_path, sheet_name=sheet_name, 
                                     skiprows=skiprows, nrows=nrows, usecols=usecols).assign(**{'File Name': file})
                all_data.append(data)
            except ValueError as e:
                print(f"ValueError in file: {file} - {e}")
            except Exception as e:
                print(f"Error in file: {file} - {e}")
    return pd.concat(all_data, ignore_index=True)



# function to extract the health baord  from the file name
def extract_hb(data):
    try:
        parts = data.split(' ')
        health_board = parts[1]
        return health_board
    except IndexError:
        # Handle cases where the format is not as expected
        return 'Unknown'


### Ingest post data from 3. Non-Medical Staffing

#### Healthcare Scientists

In [66]:
# assign the data to dataframe using the defined function
healthcare_histo_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=8, nrows=28, usecols=[1,2,3])
healthcare_noncerv_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=8, nrows=28, usecols=[1,4,5])
healthcare_neuro_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=8, nrows=28, usecols=[1,6,7])
healthcare_paed_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=8, nrows=28, usecols=[1,8,9])
healthcare_cerv_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=8, nrows=28, usecols=[1,10,11])

Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS A&A SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS D&G SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Fife SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Forth Valley SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS GG&C SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Grampian SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Highland SPAN 2023-24.xlsx
Processing fi

In [67]:
healthcare_histo_posts_df = healthcare_histo_posts_df.rename(columns={'Unnamed: 1': 'Role & Band'})
healthcare_noncerv_posts_df = healthcare_noncerv_posts_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                          'Headcount.1': 'Headcount',
                                                                          'WTE.1': 'WTE'})
healthcare_neuro_posts_df = healthcare_neuro_posts_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                     'Headcount.2': 'Headcount',
                                                                     'WTE.2': 'WTE'})
healthcare_paed_posts_df = healthcare_paed_posts_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                    'Headcount.3': 'Headcount',
                                                                     'WTE.3': 'WTE'})
healthcare_cerv_posts_df  = healthcare_cerv_posts_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                     'Headcount.4': 'Headcount',
                                                                     'WTE.4': 'WTE'})

In [68]:
healthcare_histo_posts_df['Area'] = 'General Histopathology'
healthcare_noncerv_posts_df['Area'] = 'Non-Cervical/Diagnostic Cytopathology'
healthcare_neuro_posts_df['Area'] = 'Neuropathology'
healthcare_paed_posts_df['Area'] = 'Paediatric Pathology'
healthcare_cerv_posts_df['Area'] = 'Cervical Screening'

In [69]:
healthcare_posts_df = pd.concat([healthcare_histo_posts_df,
                                 healthcare_noncerv_posts_df,
                                 healthcare_neuro_posts_df,
                                 healthcare_paed_posts_df,
                                 healthcare_cerv_posts_df])


In [70]:
healthcare_posts_df = healthcare_posts_df.dropna(subset=['Headcount', 'WTE'])

In [71]:
sum = healthcare_posts_df['Headcount'].sum()
sum

459.0

In [72]:
healthcare_posts_df['Source Sheet'] = 'Sheet3_1_ScientistStaffing'

#### Admin & Clerical

In [73]:
# assign the data to dataframe using the defined function
admin_histo_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=41, nrows=8, usecols=[1,2,3])
admin_noncerv_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=41, nrows=8, usecols=[1,4,5])
admin_neuro_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=41, nrows=8, usecols=[1,6,7])
admin_paed_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=41, nrows=8, usecols=[1,8,9])
admin_cerv_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=41, nrows=8, usecols=[1,10,11])
admin_mo_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=41, nrows=8, usecols=[1,12,13])

Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS A&A SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS D&G SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Fife SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Forth Valley SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS GG&C SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Grampian SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Highland SPAN 2023-24.xlsx
Processing fi

In [74]:
admin_histo_posts_df = admin_histo_posts_df.rename(columns={'Unnamed: 1': 'Role & Band'})
admin_noncerv_posts_df = admin_noncerv_posts_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                          'Headcount.1': 'Headcount',
                                                                          'WTE.1': 'WTE'})
admin_neuro_posts_df = admin_neuro_posts_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                     'Headcount.2': 'Headcount',
                                                                     'WTE.2': 'WTE'})
admin_paed_posts_df = admin_paed_posts_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                    'Headcount.3': 'Headcount',
                                                                     'WTE.3': 'WTE'})
admin_cerv_posts_df  = admin_cerv_posts_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                     'Headcount.4': 'Headcount',
                                                                     'WTE.4': 'WTE'})
admin_mo_posts_df  = admin_mo_posts_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                     'Headcount.5': 'Headcount',
                                                                     'WTE.5': 'WTE'})

In [75]:
admin_histo_posts_df['Area'] = 'General Histopathology'
admin_noncerv_posts_df['Area'] = 'Non-Cervical/Diagnostic Cytopathology'
admin_neuro_posts_df['Area'] = 'Neuropathology'
admin_paed_posts_df['Area'] = 'Paediatric Pathology'
admin_cerv_posts_df['Area'] = 'Cervical Screening'
admin_mo_posts_df['Area'] = 'Mortuary & Autopsy'

In [76]:
admin_posts_df = pd.concat([admin_histo_posts_df,
                                 admin_noncerv_posts_df,
                                 admin_neuro_posts_df,
                                 admin_paed_posts_df,
                                 admin_cerv_posts_df,
                                 admin_mo_posts_df])


In [77]:
admin_posts_df = admin_posts_df.dropna(subset=['Headcount', 'WTE'])

In [78]:
sum = admin_posts_df['Headcount'].sum()
sum

86.0

In [79]:
admin_posts_df['Source Sheet'] = 'Sheet3_2_AdminClericalStaffing'

#### Anatomical Pathology Technologists

In [80]:
# assign the data to dataframe using the defined function
apt_neuro_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=54, nrows=12, usecols=[1,2,3])
apt_paed_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=54, nrows=12, usecols=[1,4,5])
apt_mo_posts_df = combine_excel_files(directory, '3. Non-Medical Staffing', skiprows=54, nrows=12, usecols=[1,6,7])

Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS A&A SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS D&G SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Fife SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Forth Valley SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS GG&C SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Grampian SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Highland SPAN 2023-24.xlsx
Processing fi

In [81]:
apt_neuro_posts_df = apt_neuro_posts_df.rename(columns={'Unnamed: 1': 'Role & Band'})
apt_paed_posts_df = apt_paed_posts_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                      'Headcount.1': 'Headcount',
                                                       'WTE.1': 'WTE'})
apt_mo_posts_df = apt_mo_posts_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                  'Headcount.2': 'Headcount',
                                                  'WTE.2': 'WTE'})

In [82]:
apt_neuro_posts_df['Area'] = 'Neuropathology'
apt_paed_posts_df['Area'] = 'Paediatric Pathology'
apt_mo_posts_df['Area'] = 'Mortuary & Autopsy'

In [83]:
apt_posts_df = pd.concat([apt_neuro_posts_df,
                         apt_paed_posts_df,
                         apt_mo_posts_df])

In [84]:
apt_posts_df = apt_posts_df.dropna(subset=['Headcount', 'WTE'])

In [85]:
apt_posts_df

Unnamed: 0,Role & Band,Headcount,WTE,File Name,Area
12,APT Band 9,0.0,0.0,NHS D&G SPAN 2023-24.xlsx,Neuropathology
13,APT Band 8d,0.0,0.0,NHS D&G SPAN 2023-24.xlsx,Neuropathology
14,APT Band 8c,0.0,0.0,NHS D&G SPAN 2023-24.xlsx,Neuropathology
15,APT Band 8b,0.0,0.0,NHS D&G SPAN 2023-24.xlsx,Neuropathology
16,APT Band 8a,0.0,0.0,NHS D&G SPAN 2023-24.xlsx,Neuropathology
...,...,...,...,...,...
81,ATP Band 3,1.0,1.0,NHS Highland SPAN 2023-24.xlsx,Mortuary & Autopsy
83,ATP Trainee,1.0,1.0,NHS Highland SPAN 2023-24.xlsx,Mortuary & Autopsy
103,ATP Band 5,2.0,2.0,NHS Lothian SPAN 2023-24.xlsx,Mortuary & Autopsy
104,ATP Band 4,4.0,3.6,NHS Lothian SPAN 2023-24.xlsx,Mortuary & Autopsy


In [86]:
sum = apt_posts_df['Headcount'].sum()
sum

57.0

In [87]:
apt_posts_df['Source Sheet'] = 'Sheet3_3_ATPStaffing'

#### Merge the posts dataframes together

In [88]:
merged_posts_df = pd.concat([healthcare_posts_df,
                             admin_posts_df,
                             apt_posts_df])

In [89]:
merged_posts_df['Post/Vacancy'] = 'Post'

In [90]:
sum = merged_posts_df['WTE'].sum()

In [91]:
sum

530.16

### Ingest vacancy data from 4. Non-Medical Vacancies sheet

#### Healthcare Scientists

In [92]:
# assign the data to dataframe using the defined function
healthcare_histo_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=8, nrows=28, usecols=[1,2,3])
healthcare_noncerv_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=8, nrows=28, usecols=[1,4,5])
healthcare_neuro_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=8, nrows=28, usecols=[1,6,7])
healthcare_paed_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=8, nrows=28, usecols=[1,8,9])
healthcare_cerv_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=8, nrows=28, usecols=[1,10,11])

Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS A&A SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS D&G SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Fife SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Forth Valley SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS GG&C SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Grampian SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Highland SPAN 2023-24.xlsx
Processing fi

In [93]:
healthcare_histo_vac_df = healthcare_histo_vac_df.rename(columns={'Unnamed: 1': 'Role & Band'})
healthcare_noncerv_vac_df = healthcare_noncerv_vac_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                          'Headcount.1': 'Headcount',
                                                                          'WTE.1': 'WTE'})
healthcare_neuro_vac_df = healthcare_neuro_vac_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                     'Headcount.2': 'Headcount',
                                                                     'WTE.2': 'WTE'})
healthcare_paed_vac_df = healthcare_paed_vac_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                    'Headcount.3': 'Headcount',
                                                                     'WTE.3': 'WTE'})
healthcare_cerv_vac_df  = healthcare_cerv_vac_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                     'Headcount.4': 'Headcount',
                                                                     'WTE.4': 'WTE'})

In [94]:
healthcare_histo_vac_df['Area'] = 'General Histopathology'
healthcare_noncerv_vac_df['Area'] = 'Non-Cervical/Diagnostic Cytopathology'
healthcare_neuro_vac_df['Area'] = 'Neuropathology'
healthcare_paed_vac_df['Area'] = 'Paediatric Pathology'
healthcare_cerv_vac_df['Area'] = 'Cervical Screening'

In [95]:
healthcare_vac_df = pd.concat([healthcare_histo_vac_df,
                                 healthcare_noncerv_vac_df,
                                 healthcare_neuro_vac_df,
                                 healthcare_paed_vac_df,
                                 healthcare_cerv_vac_df])


In [96]:
healthcare_vac_df = healthcare_vac_df.dropna(subset=['Headcount', 'WTE'])

In [97]:
sum = healthcare_vac_df['WTE'].sum()
sum

25.590000000000003

In [98]:
healthcare_vac_df['Source Sheet'] = 'Sheet4_1_ScientistVacancy'

#### Admin & Clerical

In [99]:
# assign the data to dataframe using the defined function
admin_histo_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=41, nrows=8, usecols=[1,2,3])
admin_noncerv_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=41, nrows=8, usecols=[1,4,5])
admin_neuro_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=41, nrows=8, usecols=[1,6,7])
admin_paed_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=41, nrows=8, usecols=[1,8,9])
admin_cerv_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=41, nrows=8, usecols=[1,10,11])
admin_mo_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=41, nrows=8, usecols=[1,12,13])

Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS A&A SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS D&G SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Fife SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Forth Valley SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS GG&C SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Grampian SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Highland SPAN 2023-24.xlsx
Processing fi

In [100]:
admin_histo_vac_df = admin_histo_vac_df.rename(columns={'Unnamed: 1': 'Role & Band'})
admin_noncerv_vac_df = admin_noncerv_vac_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                          'Headcount.1': 'Headcount',
                                                                          'WTE.1': 'WTE'})
admin_neuro_vac_df = admin_neuro_vac_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                     'Headcount.2': 'Headcount',
                                                                     'WTE.2': 'WTE'})
admin_paed_vac_df = admin_paed_vac_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                    'Headcount.3': 'Headcount',
                                                                     'WTE.3': 'WTE'})
admin_cerv_vac_df  = admin_cerv_vac_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                     'Headcount.4': 'Headcount',
                                                                     'WTE.4': 'WTE'})
admin_mo_vac_df  = admin_mo_vac_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                                     'Headcount.5': 'Headcount',
                                                                     'WTE.5': 'WTE'})

In [101]:
admin_histo_vac_df['Area'] = 'General Histopathology'
admin_noncerv_vac_df['Area'] = 'Non-Cervical/Diagnostic Cytopathology'
admin_neuro_vac_df['Area'] = 'Neuropathology'
admin_paed_vac_df['Area'] = 'Paediatric Pathology'
admin_cerv_vac_df['Area'] = 'Cervical Screening'
admin_mo_vac_df['Area'] = 'Mortuary & Autopsy'

In [102]:
admin_vac_df = pd.concat([admin_histo_vac_df,
                                 admin_noncerv_vac_df,
                                 admin_neuro_vac_df,
                                 admin_paed_vac_df,
                                 admin_cerv_vac_df,
                                 admin_mo_vac_df])


In [103]:
admin_vac_df['Source Sheet'] = 'Sheet4_2_AdminClericalVacancy'

#### Anatomical Pathology Technologists

In [104]:
# assign the data to dataframe using the defined function
apt_neuro_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=54, nrows=12, usecols=[1,2,3])
apt_paed_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=54, nrows=12, usecols=[1,4,5])
apt_mo_vac_df = combine_excel_files(directory, '4. Non-Medical Vacancies', skiprows=54, nrows=12, usecols=[1,6,7])

Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS A&A SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS D&G SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Fife SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Forth Valley SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS GG&C SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Grampian SPAN 2023-24.xlsx
Processing file: C:\Users\scotth07\OneDrive - NHS Scotland\Documents\Networks & Projects\SPAN UNI\Returns\23_24\NHS Highland SPAN 2023-24.xlsx
Processing fi

In [105]:
apt_neuro_vac_df = apt_neuro_vac_df.rename(columns={'Unnamed: 1': 'Role & Band'})
apt_paed_vac_df = apt_paed_vac_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                      'Headcount.1': 'Headcount',
                                                       'WTE.1': 'WTE'})
apt_mo_vac_df = apt_mo_vac_df.rename(columns={'Unnamed: 1': 'Role & Band',
                                                  'Headcount.2': 'Headcount',
                                                  'WTE.2': 'WTE'})

In [106]:
apt_neuro_vac_df['Area'] = 'Neuropathology'
apt_paed_vac_df['Area'] = 'Paediatric Pathology'
apt_mo_vac_df['Area'] = 'Mortuary & Autopsy'

In [107]:
apt_vac_df = pd.concat([apt_neuro_vac_df,
                         apt_paed_vac_df,
                         apt_mo_vac_df])

In [108]:
apt_vac_df = apt_vac_df.dropna(subset=['Headcount', 'WTE'])

In [109]:
apt_vac_df['Source Sheet'] = 'Sheet4_3_ATPVacancy'

#### Merge the posts dataframes together

In [110]:
merged_vac_df = pd.concat([healthcare_vac_df,
                             admin_vac_df,
                             apt_vac_df])

In [111]:
merged_vac_df['Post/Vacancy'] = 'Vacancy'

#### Merge all staffing together

In [112]:
non_med_staffing_df = pd.concat([merged_posts_df,
                                merged_vac_df])

In [113]:
non_med_staffing_df[['Role', 'Band']] = non_med_staffing_df['Role & Band'].str.split(' Band ', expand=True)

In [114]:
def update_salary_band(row):
    if 'Trainee' in row['Role']:
        row['Band'] = 'Trainee'
    elif 'Other' in row['Role']:
        row['Band'] = 'Other'
    return row

In [115]:
non_med_staffing_df = non_med_staffing_df.apply(update_salary_band, axis=1)

In [116]:
non_med_staffing_df['Role'] = non_med_staffing_df['Role'].str.replace(' Trainee', '', regex=False)

In [117]:
non_med_staffing_df['Band'].unique()

array(['8a', '7', '6', 'Trainee', '4', '3', '9 ', '8d', '8c', '8b', '5',
       '2', '1', 'Other', '9'], dtype=object)

In [118]:
non_med_staffing_df.columns

Index(['Role & Band', 'Headcount', 'WTE', 'File Name', 'Area', 'Source Sheet',
       'Post/Vacancy', 'Role', 'Band'],
      dtype='object')

In [119]:
non_med_staffing_df['Year'] = '23/24'

In [120]:
non_med_staffing_df = non_med_staffing_df.rename(columns={'File Name': 'Source File'})

In [121]:
# extract health board from file name
non_med_staffing_df['Health Board'] = non_med_staffing_df['Source File'].apply(extract_hb)

In [122]:
# rename health boards with short code
non_med_staffing_df = non_med_staffing_df.replace({'Fife': 'FIFE',
                               'Forth': 'FV',
                               'Grampian': 'GRAM',
                               'Highland': 'HIGH',
                               'Lanarkshire': 'LAN',
                               'Lothian': 'LOTH',
                               'Tayside': 'TAY'})

In [123]:
# define new order for columns
column_order = ['Source File',
               'Source Sheet',
               'Health Board',
               'Year',
               'Area',
               'Role',
               'Band',
               'Post/Vacancy',
               'WTE',
               'Headcount']


# apply new column order
non_med_staffing_df = non_med_staffing_df[column_order]

In [124]:
non_med_staffing_df.to_csv('cleaned_data/non_medical_staffing.csv', index=False)