In [35]:
import pandas as pd

# Define the file paths
file_path_ohs = '/home/dragon/Git/Data/ohs-hc-template_v11.xlsx'
file_path_nte = '/home/dragon/Git/Data/nte-report-41724.xlsx'

# Read the specific sheet from the OHS HC Template file
positions_df = pd.read_excel(file_path_ohs, sheet_name='Positions Data Template', engine='openpyxl')

# Clean up column names by stripping any leading/trailing whitespace or newline characters
positions_df.columns = positions_df.columns.str.strip().str.replace('\n', '')

# Read the NTE REPORT file starting from the second row
nte_df = pd.read_excel(file_path_nte, skiprows=1, engine='openpyxl')

# Create a new DataFrame with specific columns from positions_df
position = positions_df[[
    'OHS PIN', 'FY Position Authorization', 'Supervisor PIN',
    'Directorate/Unit', 'Division', 'Branch/Program', 'Position Type',
    'Encumbered Position', 'Position Status', 'Employee Status',
    'Employee ID', 'Employee Name', 'Preferred Name', 'Position Title',
    'Position Description Title', 'Pay Plan', 'Minimum Grade',
    'Maximum Grade', 'Career Ladder Position','Hiring Type','Lapse in Appropriations Status',
    'Official Workplace Flexibility (Position)', 'Position Clearance','Position DOE Clearance', 'Notes'
]].copy()



# Add a new column 'Supervisor Role'       
position['Supervisor Role'] = position['OHS PIN'].map(position['Supervisor PIN'].value_counts())

# Fill NaN values with 0
position['Supervisor Role'].fillna(0, inplace=True)

# Rearrange columns so 'Supervisor Role' comes immediately after 'Supervisor PIN'                


# Function to check if Pay Plan is within position grade range
def check_grade_range(row):
    try:
        pay_plan = float(row['Pay Plan'])
        min_grade = float(row['Minimum Grade'])
        max_grade = float(row['Maximum Grade'])
        
        if pay_plan >= min_grade and pay_plan <= max_grade:
            return 'Within Position Grade Range'
        else:
            return 'Outside of Position Grade Range'
    except ValueError:
        return 'Error: Non-numeric value'


# Apply the function to create a new column 'Grade Range Status'
position['Grade Range Status'] = position.apply(check_grade_range, axis=1)

position = position[['OHS PIN', 'FY Position Authorization', 'Supervisor PIN','Supervisor Role',
                     'Directorate/Unit', 'Division', 'Branch/Program', 'Position Type',
                     'Encumbered Position', 'Position Status', 'Employee Status',
                     'Employee ID', 'Employee Name', 'Preferred Name', 'Position Title',
                     'Position Description Title', 'Pay Plan', 'Minimum Grade',
                     'Maximum Grade', 'Career Ladder Position','Grade Range Status','Hiring Type',
                     'Lapse in Appropriations Status','Official Workplace Flexibility (Position)', 
                     'Position Clearance','Position DOE Clearance', 'Notes'
                     ]]

# Set pandas options to display all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Display the updated DataFrame with the new column
print("Position DataFrame with Grade Range Status:")
print(position)

# Display the new DataFrame
print("Position DataFrame with rearranged columns and Supervisor Role:")
position.head()


Position DataFrame with Grade Range Status:
                       OHS PIN  FY Position Authorization Supervisor PIN  \
0                    I23610102                     2023.0      I23418351   
1                    I23418351                     2023.0      A22703317   
2                    I23401414                     2023.0      I23418351   
3                    I23488814                     2023.0      I23418351   
4                    I23883271                     2023.0      I23418351   
5                    I23896480                     2023.0      I23418351   
6                    I23286402                     2023.0      I23418351   
7                  A2291015216                     2024.0      A22703317   
8                    I22296439                     2024.0    A2291015216   
9                  N/A - DTL18                     2023.0    A2291015216   
10                 A2290950036                     2024.0      A22703317   
11                   A22799485              

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  position['Supervisor Role'].fillna(0, inplace=True)


Unnamed: 0,OHS PIN,FY Position Authorization,Supervisor PIN,Supervisor Role,Directorate/Unit,Division,Branch/Program,Position Type,Encumbered Position,Position Status,Employee Status,Employee ID,Employee Name,Preferred Name,Position Title,Position Description Title,Pay Plan,Minimum Grade,Maximum Grade,Career Ladder Position,Grade Range Status,Hiring Type,Lapse in Appropriations Status,Official Workplace Flexibility (Position),Position Clearance,Position DOE Clearance,Notes
0,I23610102,2023.0,I23418351,0.0,HCSOD,Border_Health,DHS Child Well-Being,Incumbent,No,Vacant,Vacancy,,,,Regional Planner,TBD,GS,13,,,Error: Non-numeric value,,,,,,
1,I23418351,2023.0,A22703317,6.0,HCSOD,Border_Health,DHS Child Well-Being,Incumbent,No,Filled,Permanent,P100008,,Rachel Burton,Program Manager,Social Worker,GS,14,,,Error: Non-numeric value,,Potentially Excepted in the Event of a Lapse i...,Field-Based,,,
2,I23401414,2023.0,I23418351,0.0,HCSOD,Border_Health,DHS Child Well-Being,Incumbent,No,Vacant,Vacancy,,,,Liaison to CBP,TBD,GS,13,,,Error: Non-numeric value,,,,,,
3,I23488814,2023.0,I23418351,0.0,HCSOD,Border_Health,DHS Child Well-Being,Incumbent,No,Vacant,Vacancy,,,,Program Analyst,TBD,GS,12,,,Error: Non-numeric value,,,,,,
4,I23883271,2023.0,I23418351,0.0,HCSOD,Border_Health,DHS Child Well-Being,Incumbent,No,Filled,Permanent,P100037,,,Medical Officer,Physician,GS,12,,,Error: Non-numeric value,,Potentially Excepted in the Event of a Lapse i...,Field-Based,,,


In [34]:
positions_df.columns

Index(['OHS PIN', 'FY Position Authorization', 'Supervisor PIN',
       'Directorate/Unit', 'Division', 'Branch/Program', 'Position Type',
       'Encumbered Position', 'Position Status', 'Employee Status',
       'Employee ID', 'Employee Name', 'Preferred Name', 'Position Title',
       'Position Description Title', 'Pay Plan', 'Minimum Grade',
       'Maximum Grade', 'Career Ladder Position', 'Hiring Type',
       'Lapse in Appropriations Status',
       'Official Workplace Flexibility (Position)', 'Position Clearance',
       'Position DOE Clearance', 'Notes'],
      dtype='object')