In [2]:
import pandas as pd

# Define the file paths
file_path_ohs = '/home/dragon/Git/Data/ohs-hc-template_v11.xlsx'
file_path_nte = '/home/dragon/Git/Data/nte-report-41724.xlsx'

# Read the specific sheet from the OHS HC Template file
positions_df = pd.read_excel(file_path_ohs, sheet_name='Positions Data Template', engine='openpyxl')

# Clean up column names by stripping any leading/trailing whitespace or newline characters
positions_df.columns = positions_df.columns.str.strip().str.replace('\n', '')

# Read the NTE REPORT file starting from the second row
nte_df = pd.read_excel(file_path_nte, skiprows=1, engine='openpyxl')

# Create a new DataFrame with specific columns from positions_df
position = positions_df[[
    'OHS PIN', 'FY Position Authorization', 'Supervisor PIN',
     'Division', 'Branch/Program', 'Position Type',
    'Encumbered Position', 'Position Status', 'Employee Status',
    'Employee ID', 'Employee Name', 'Preferred Name', 'Position Title',
    'Position Description Title', 'Pay Plan', 'Minimum Grade',
    'Maximum Grade', 'Career Ladder Position','Hiring Type','Lapse in Appropriations Status',
    'Official Workplace Flexibility (Position)', 'Position Clearance','Position DOE Clearance', 'Notes'
]].copy()



# Add a new column 'Supervisor Role'       
position['Supervisor Role'] = position['OHS PIN'].map(position['Supervisor PIN'].value_counts())

# Fill NaN values with 0
position['Supervisor Role'].fillna(0, inplace=True)

# Rearrange columns so 'Supervisor Role' comes immediately after 'Supervisor PIN'                


# Function to check if Pay Plan is within position grade range
def check_grade_range(row):
    try:
        pay_plan = float(row['Pay Plan'])
        min_grade = float(row['Minimum Grade'])
        max_grade = float(row['Maximum Grade'])
        
        if pay_plan >= min_grade and pay_plan <= max_grade:
            return 'Within Position Grade Range'
        else:
            return 'Outside of Position Grade Range'
    except ValueError:
        return 'Error: Non-numeric value'


# Apply the function to create a new column 'Grade Range Status'
position['Grade Range Status'] = position.apply(check_grade_range, axis=1)

position = position[['OHS PIN', 'FY Position Authorization', 'Supervisor PIN','Supervisor Role',
                     'Division', 'Branch/Program', 'Position Type',
                     'Encumbered Position', 'Position Status', 'Employee Status',
                     'Employee ID', 'Employee Name', 'Preferred Name', 'Position Title',
                     'Position Description Title', 'Pay Plan', 'Minimum Grade',
                     'Maximum Grade', 'Career Ladder Position','Grade Range Status','Hiring Type',
                     'Lapse in Appropriations Status','Official Workplace Flexibility (Position)', 
                     'Position Clearance','Position DOE Clearance', 'Notes'
                     ]]

# Set pandas options to display all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


# Display the new DataFrame
print("Position DataFrame with rearranged columns and Supervisor Role:")
position


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  position['Supervisor Role'].fillna(0, inplace=True)


Position DataFrame with rearranged columns and Supervisor Role:


Unnamed: 0,OHS PIN,FY Position Authorization,Supervisor PIN,Supervisor Role,Division,Branch/Program,Position Type,Encumbered Position,Position Status,Employee Status,Employee ID,Employee Name,Preferred Name,Position Title,Position Description Title,Pay Plan,Minimum Grade,Maximum Grade,Career Ladder Position,Grade Range Status,Hiring Type,Lapse in Appropriations Status,Official Workplace Flexibility (Position),Position Clearance,Position DOE Clearance,Notes
0,I23610102,2023.0,I23418351,0.0,Border_Health,DHS Child Well-Being,Incumbent,No,Vacant,Vacancy,,,,Regional Planner,TBD,GS,13,,No,Error: Non-numeric value,,,,,,
1,I23418351,2023.0,A22703317,6.0,Border_Health,DHS Child Well-Being,Incumbent,No,Filled,Permanent,P100008,,Rachel Burton,Program Manager,Social Worker,GS,14,,No,Error: Non-numeric value,,Potentially Excepted in the Event of a Lapse i...,Field-Based,,,
2,I23401414,2023.0,I23418351,0.0,Border_Health,DHS Child Well-Being,Incumbent,No,Vacant,Vacancy,,,,Liaison to CBP,TBD,GS,13,,No,Error: Non-numeric value,,,,,,
3,I23488814,2023.0,I23418351,0.0,Border_Health,DHS Child Well-Being,Incumbent,No,Vacant,Vacancy,,,,Program Analyst,TBD,GS,12,,No,Error: Non-numeric value,,,,,,
4,I23883271,2023.0,I23418351,0.0,Border_Health,DHS Child Well-Being,Incumbent,No,Filled,Permanent,P100037,,,Medical Officer,Physician,GS,12,,No,Error: Non-numeric value,,Potentially Excepted in the Event of a Lapse i...,Field-Based,,,
5,I23896480,2023.0,I23418351,0.0,Border_Health,DHS Child Well-Being,Incumbent,No,Vacant,Vacancy,,,,Program Analyst,TBD,GS,13,,No,Error: Non-numeric value,,,,,,
6,I23286402,2023.0,I23418351,0.0,Border_Health,DHS Child Well-Being,Incumbent,No,Vacant,Vacancy,,,,Program Analyst,TBD,GS,13,,No,Error: Non-numeric value,,,,,,
7,A2291015216,2024.0,A22703317,2.0,Health_and_Medical_Readiness,Psychological Readiness,Authorized,No,Vacant,Vacancy,,,,Clinical Psychologist,,GS,15,,No,Error: Non-numeric value,,,,,,
8,I22296439,2024.0,A2291015216,0.0,Health_and_Medical_Readiness,Psychological Readiness,Incumbent,No,Filled,Permanent,,"Brakefield- Allen, Tiffany",Tiffany Brakefield- Allen,Clinical Psychologist,Clinical Psychologist,GS,14,,Yes,Error: Non-numeric value,,,,,,
9,N/A - DTL18,2023.0,A2291015216,0.0,Health_and_Medical_Readiness,Psychological Readiness,Authorized,No,Filled,Detailee,D100029,,Frances Poleto,SMARRT Program Manager,SMARRT Implementation Program Manager,GS,15,,No,Error: Non-numeric value,,Non-Exempt (Federal),Field-Based,,,


In [3]:
positions_df.columns

Index(['OHS PIN', 'FY Position Authorization', 'Supervisor PIN', '',
       'Division', 'Branch/Program', 'Position Type', 'Encumbered Position',
       'Position Status', 'Employee Status', 'Employee ID', 'Employee Name',
       'Preferred Name', 'Position Title', 'Position Description Title',
       'Pay Plan', 'Minimum Grade', 'Maximum Grade', 'Career Ladder Position',
       'Hiring Type', 'Lapse in Appropriations Status',
       'Official Workplace Flexibility (Position)', 'Position Clearance',
       'Position DOE Clearance', 'Notes'],
      dtype='object')

# Chapter 2

In [4]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

# Define the file path for the OHS HC Template
file_path_ohs = '/home/dragon/Git/Data/ohs-hc-template_v11.xlsx'

# Read the 'Vacancy Data' sheet from the Excel file
vacancy_df = pd.read_excel(file_path_ohs, sheet_name='Vacancy Data', engine='openpyxl')

# Extract specific columns from vacancy_df
columns_to_add = [
    'Hire Manager', 'HC Servicing Specialists', 'FedHR Navigation Number', 
    'Nature of Action', 'Current Status', 'Action Owner', 'USA Jobs', 
    '1. PND PRF Submission', '2. PRF Approved', '3. Budget Certification Complete', 
    '4. Recruitment Request Submitted to OCHCO', '5. PD Classification Complete', 
    '6. Recruitment Package Routed to HRMS Staffing POC', '7. Draft Job Analysis Received', 
    '8. Job Analysis Returned', '9. Draft Vacancy Announcement Documents Received', 
    '10. Vacancy Announcement Documents Returned', '11. Vacancy Announcement Open', 
    '12. Vacancy Announcement Closed', '13. Certificate Issued', '14. Certificate Returned', 
    '15. TJO Issued', '16. Security', '17. FJO Issued', '18. EOD Set or Cancellation Date', 
    'EOD Set or Cancellation', 'Certificate Expiration Date','Honorific Title', 'Selectee Legal Last Name',
    'Selectee Legal First Name', 'Suffix', 'Notes'
]

# Select only the required columns from vacancy_df
additional_data = vacancy_df[columns_to_add].copy()

# Concatenate vacancy_df and additional_data horizontally (side by side)
vacancy = pd.concat([vacancy_df, additional_data], axis=1)

# Convert 'Vacant Date' to datetime format
vacancy['Vacant Date'] = pd.to_datetime(vacancy['Vacant Date'], errors='coerce')

# Calculate 'Length of Vacancy (Days)'
vacancy['Length of Vacancy (Days)'] = (datetime.now() - vacancy['Vacant Date']).dt.days

# Extract columns of interest for date calculation
date_columns = vacancy.columns[vacancy.columns.str.startswith('1.') & vacancy.columns.str.endswith('Date')]

# Convert date columns to datetime format
vacancy[date_columns] = vacancy[date_columns].apply(pd.to_datetime, errors='coerce')

# Find the furthest date recorded to the right
vacancy['LastDate'] = vacancy[date_columns].max(axis=1)

# Calculate the number of workdays between 'LastDate' and today's date, excluding weekends
valid_dates = vacancy['LastDate'].notna()
vacancy.loc[valid_dates, 'Days in Stage'] = np.busday_count(vacancy.loc[valid_dates, 'LastDate'].values.astype('datetime64[D]'), np.datetime64('today'))

vacancy = vacancy[[
    'Hire Manager', 'HC Servicing Specialists', 'FedHR Navigation Number', 
    'Nature of Action', 'Current Status', 'Action Owner', 'USA Jobs', 
    '1. PND PRF Submission', '2. PRF Approved', '3. Budget Certification Complete', 
    '4. Recruitment Request Submitted to OCHCO', '5. PD Classification Complete', 
    '6. Recruitment Package Routed to HRMS Staffing POC', '7. Draft Job Analysis Received', 
    '8. Job Analysis Returned', '9. Draft Vacancy Announcement Documents Received', 
    '10. Vacancy Announcement Documents Returned', '11. Vacancy Announcement Open', 
    '12. Vacancy Announcement Closed', '13. Certificate Issued', '14. Certificate Returned', 
    '15. TJO Issued', '16. Security', '17. FJO Issued', '18. EOD Set or Cancellation Date', 
    'EOD Set or Cancellation', 'Certificate Expiration Date','Vacant Date', 'Length of Vacancy (Days)',
    'LastDate','Days in Stage','Honorific Title', 'Selectee Legal Last Name','Selectee Legal First Name', 
    'Suffix', 'Notes'
]]

# Display the updated DataFrame with the new columns
vacancy.head()


Unnamed: 0,Hire Manager,Hire Manager.1,HC Servicing Specialists,HC Servicing Specialists.1,FedHR Navigation Number,FedHR Navigation Number.1,Nature of Action,Nature of Action.1,Current Status,Current Status.1,...,Honorific Title,Honorific Title.1,Selectee Legal Last Name,Selectee Legal Last Name.1,Selectee Legal First Name,Selectee Legal First Name.1,Suffix,Suffix.1,Notes,Notes.1
0,,,,,,,,,18. EOD Set or Cancellation Date,18. EOD Set or Cancellation Date,...,,,,,,,,,Notes,Notes
1,,,,,,,,,18. EOD Set or Cancellation Date,18. EOD Set or Cancellation Date,...,,,,,,,,,Shifted to FY22 as position # F14,Shifted to FY22 as position # F14
2,,,,,,,,,18. EOD Set or Cancellation Date,18. EOD Set or Cancellation Date,...,,,Sanders,Sanders,John,John,,,Shifted to FY22 as position # F1,Shifted to FY22 as position # F1
3,,,,,,,,,18. EOD Set or Cancellation Date,18. EOD Set or Cancellation Date,...,,,,,,,,,,
4,,,,,,,,,18. EOD Set or Cancellation Date,18. EOD Set or Cancellation Date,...,,,,,,,,,No candidates after reposting #2.,No candidates after reposting #2.


# chapter 3

In [5]:
import pandas as pd

# Define the file path
file_path_ohs = '/home/dragon/Git/Data/ohs-hc-template_v11.xlsx'

# Read specific sheets into DataFrames
individual_data_template = pd.read_excel(file_path_ohs, sheet_name='Individual Data Template')
external_detailee_data_template = pd.read_excel(file_path_ohs, sheet_name='External Detailee Data Template')
direct_support_ctr_template = pd.read_excel(file_path_ohs, sheet_name='Direct Support CTR Template')
positions_data_template = pd.read_excel(file_path_ohs, sheet_name='Positions Data Template')

# Display the first few rows of each DataFrame to confirm successful loading
print("Individual Data Template:")
print(individual_data_template.head())

print("\nExternal Detailee Data Template:")
print(external_detailee_data_template.head())

print("\nDirect Support CTR Template:")
print(direct_support_ctr_template.head())

print("\nPositions Data Template:")
print(positions_data_template.head())


Individual Data Template:
  Employee ID Honorific Title Employee Legal Last Name  \
0     P100001             NaN                 Anderson   
1     P100002             NaN            Backer-Krisel   
2     P100003             NaN                Ballering   
3     P100004             NaN                    Biles   
4     P100005             NaN                   Blount   

  Employee Legal First Name  Suffix  Preferred Name  OHS EOD Date  \
0                      Karl     NaN             NaN           NaN   
1                 Stephanie     NaN             NaN           NaN   
2                     Katie     NaN             NaN           NaN   
3                     Amber     NaN             NaN           NaN   
4                     Brett     NaN             NaN           NaN   

   Separation Date  Reason for Separation              Email Address Username  \
0              NaN                    NaN            Karl.Anderson@hq.dhs.gov   
1              NaN                    NaN  Steph