In [None]:
import pandas as pd
from powerbiclient.dataset import dataset

# Read the Datasets into DataFrames
staff_df = dataset["staff"]
types_df = dataset["types"]
departments_df = dataset["departments"]

# Merge the DataFrames
merged_df = staff_df.merge(types_df, on="Project", how="left")
merged_df = merged_df.merge(departments_df, on="Job", how="left")

# Return the merged DataFrame to Power BI
return merged_df

In [36]:
# Group by project, job title, and type, counting occurrences in each month
grouped_df = merged_df.groupby(["Project", "Job", "Type"])["Month"].value_counts().unstack(fill_value=0)

# Find the peak number for each job title within each project-type combination
peak_counts = grouped_df.max(axis=1)

# Reset the index to include project, job title, and type
result = peak_counts.reset_index()

# Rename the column before saving
result = result.rename(columns={0: "Peak Count"})  # Renamed directly on "result"

# Assign the DataFrame with the Peak Count column to a variable
df_with_peak_count = result

# Power BI will automatically display "df_with_peak_count" as a table visual
# You can create further visualizations based on this DataFrame using Power BI's built-in features

In [37]:
# Read the data
df = pd.read_excel("unique_job_titles_peak_counts_with_types.xlsx")

# Group by type and job, calculate average peak count
grouped_df = df.groupby(["Type", "Job"])["Peak Count"].mean().round().astype(int).reset_index()

# Create the final DataFrame
final_df = pd.DataFrame()
for type in grouped_df["Type"].unique():
    type_data = grouped_df[grouped_df["Type"] == type]
    final_df = pd.concat([final_df, type_data.set_index("Job").rename_axis(type)], axis=1)  # Removed `.to_frame()`

# Save to Excel with a single sheet
with pd.ExcelWriter("example_projects_structured.xlsx") as writer:
    final_df.to_excel(writer, sheet_name="Example Projects", index=True)

print("Example projects saved to 'example_projects_structured.xlsx' successfully!")

Example projects saved to 'example_projects_structured.xlsx' successfully!


In [38]:
import pandas as pd
import numpy as np
from scipy.stats import norm

# Read the data
df = pd.read_excel("unique_job_titles_peak_counts_with_types.xlsx")

# Get user input for type and duration
user_type = input("Enter the type: ")
user_duration = int(input("Enter the duration in months: "))


In [39]:

def generate_bell_curve_counts(duration, peak_count):
    """Generates counts in a bell curve pattern with zeros at the beginning and end."""
    counts = np.zeros(duration)  # Start with zeros
    midpoint = duration // 2

    # Introduce the multiplier to adjust peak scaling
    multiplier = 1.25  # Adjust as needed to reach the desired peak value

    # Fill counts from midpoint to both ends with a bell curve shape
    for i in range(midpoint):
        count = int(peak_count * multiplier * (1 - (abs(midpoint - i) / midpoint) ** 1.5))
        counts[midpoint - i] = count
        counts[midpoint + i] = count

    return counts


In [40]:
# Filter data based on user input
filtered_df = df[df["Type"] == user_type]

# Get unique job titles and average peak counts
unique_peaks = filtered_df.groupby("Job")["Peak Count"].mean().round().astype(int).reset_index()

# Create a DataFrame with unique job titles and rounded average peak counts
result_df = unique_peaks.set_index("Job")

# Create date columns, but don't overwrite result_df
months = pd.date_range(start="2024-01-01", periods=user_duration, freq="MS")
result_df = result_df.reindex(columns=result_df.columns.tolist() + list(months))  # Add date columns to existing result_df


# Generate counts in a bell curve pattern for each job title
for job_title in result_df.index:
    peak_count = result_df.loc[job_title, "Peak Count"]

    # Generate counts based on peak count
    if peak_count == 1:
        counts = np.ones(len(months))  # Set flatline of 1s for peak count 1
    else:
        # Indent this block correctly
        counts = generate_bell_curve_counts(len(months), peak_count)  # Use the new function

    # Assign counts to date columns (this line should be outside the if-else block)
    result_df.loc[job_title, months] = counts

# Save to Excel with the type name
output_filename = f"{user_type}.xlsx"
result_df.to_excel(output_filename)

print(f"Average peak counts for type '{user_type}' saved to '{output_filename}' successfully!")

Average peak counts for type 'Bridges' saved to 'Bridges.xlsx' successfully!
