In [1]:
import pandas as pd

# Load the staff data from 'staff.xlsx'
staff_data = pd.read_excel('staff.xlsx')

# Load the types data from 'types.xlsx'
types_data = pd.read_excel('types.xlsx')

# Display the first few rows to ensure the data is loaded correctly
print(staff_data.head())
print(types_data.head())

                         Job    Project      Month
0                  سائق لودر  Beni Suef 2015-10-01
1              Site Engineer        Tb2 2015-10-01
2          Project Manager .        Tb2 2015-10-01
3              Site Engineer        Tb2 2015-10-01
4  Technical Office Engineer        Tb2 2015-10-01
            Project        Type
0         Beni Suef  Industrial
1               Tb2  Commercial
2          New Giza  Commercial
3   Hyper Solimanya  Commercial
4  New Giza phase 2  Commercial


In [2]:
# Group the data by 'Project' and 'Job', and create a temporary DataFrame
grouped_data_with_types = staff_data.groupby(['Project', 'Job']).agg(
    Peak_Number=('Job', 'count')
).reset_index()

# Display the peak number of each job title in each project
print("Peak number of each job title in each project:")
print(grouped_data_with_types)

# Group the data by 'Job' and create another temporary DataFrame
grouped_data_with_averages = staff_data.groupby('Job').agg(
    Average_Start_Month=('Month', 'min'),
    Average_End_Month=('Month', 'max')
).reset_index()

# Merge the two DataFrames to create a single DataFrame with all necessary information
merged_grouped_data = pd.merge(grouped_data_with_types, grouped_data_with_averages, on='Job')

# Convert 'Month' columns to datetime for accurate calculations
merged_grouped_data['Average_Start_Month'] = pd.to_datetime(merged_grouped_data['Average_Start_Month'])
merged_grouped_data['Average_End_Month'] = pd.to_datetime(merged_grouped_data['Average_End_Month'])

# Calculate average start and end months relative to project duration (month 0 and last month)
merged_grouped_data['Relative_Start_Month'] = merged_grouped_data['Average_Start_Month'].dt.month - 1
merged_grouped_data['Relative_End_Month'] = merged_grouped_data['Average_End_Month'].dt.month - 1

# Display the results
print("Average start and end months for each job (relative to project duration):")
print(merged_grouped_data)

Peak number of each job title in each project:
                            Project                        Job  Peak_Number
0            10th of Ramadan Bridge      Blacksmith Supervisor            3
1            10th of Ramadan Bridge       Carpenter Supervisor            2
2            10th of Ramadan Bridge       Construction Foreman            3
3            10th of Ramadan Bridge       Construction Manager            4
4            10th of Ramadan Bridge        Document Controller            2
...                             ...                        ...          ...
5203            Zed Towers Phase II       Store Keeper Officer           37
5204            Zed Towers Phase II                   Surveyor           38
5205            Zed Towers Phase II    Talent Program Engineer            6
5206            Zed Towers Phase II  Technical Office Engineer            9
5207  koumassi cote d'ivoire Branch            Project Manager            1

[5208 rows x 3 columns]
Average start an

In [3]:
from datetime import timedelta

# Define the function to create the deployment plan
def create_deployment_plan(project_type, duration_months):

  # Filter data based on project type (using the merged DataFrame)
  filtered_data = merged_grouped_data[merged_grouped_data['Type'] == project_type]

  # Calculate average number of jobs
  average_job_count = filtered_data['Peak_Number'].mean()

  # Adjust start and end months based on user input
  start_month = filtered_data['Relative_Start_Month'].mean() + 1
  end_month = start_month + duration_months - 1

  # Ensure end month doesn't exceed dataset's max
  max_end_month = filtered_data['Relative_End_Month'].max()
  if end_month > max_end_month:
    end_month = max_end_month

  # Calculate actual start and end dates
  start_date = datetime.datetime(2024, 1, 1) + timedelta(months=start_month - 1)
  end_date = start_date + timedelta(months=duration_months)

  # Create and return the deployment plan dictionary
  deployment_plan = {
      'Project_Type': project_type,
      'Start_Month': start_month,
      'End_Month': end_month,
      'Number_of_Jobs': int(average_job_count),
      'Start_Date': start_date.strftime("%Y-%m-%d"),
      'End_Date': end_date.strftime("%Y-%m-%d"),
  }

  return deployment_plan

# Get user input
project_type = input("Enter the project type: ")
duration_months = int(input("Enter the project duration in months: "))

# Create and print the deployment plan
deployment_plan = create_deployment_plan(project_type, duration_months)
print("Deployment Plan for", project_type, "Project:")
print(deployment_plan)

ValueError: invalid literal for int() with base 10: ''