In [None]:
# Week 5: Rule-Based Intelligence and Ticket Prioritization
#This week focuses on converting analytical insights into actionable rules for prioritization and decision making.

In [None]:
#Resolution time analysis
df["Resolution_Time"] = (
    df["Priority"]
    .astype(str)
    .str.strip()
    .str.lower()
    .replace({
        "high": 2,
        "medium": 4,
        "low": 6
    })
)

In [None]:
#Week 5 resolution time analysis and it's first 5 values
df[["Priority", "Resolution_Time"]].head()

In [None]:
#Week 5 - average resolution time
df["Resolution_Time"].mean()

In [None]:
#Week 5 - avg for each columns
avg_by_type = df.groupby("Department")["Resolution_Time"].mean()

In [None]:
#Week 5 - ag resolution time bar chart
avg_by_type.plot(kind="bar")
plt.title("Average Resolution Time by Ticket Type")
plt.xlabel("Ticket Type")
plt.ylabel("Average Resolution Time")
plt.show()


In [None]:
#Average Resolution Time by Priority
avg_by_priority = df.groupby("Priority")["Resolution_Time"].mean()
avg_by_priority


In [None]:
#Compare Resolution Time by Ticket Type
avg_by_ticket_type = df.groupby("Cluster_Label")["Resolution_Time"].mean()
avg_by_ticket_type

In [None]:
#Visualization
avg_by_priority.plot(kind="bar")
plt.title("Average Resolution Time by Priority")
plt.xlabel("Priority")
plt.ylabel("Average Resolution Time")
plt.show()


In [None]:
#Visualization
df.boxplot(column="Resolution_Time", by="Cluster_Label")
plt.title("Resolution Time by Ticket Type")
plt.suptitle("")
plt.xlabel("Ticket Type")
plt.ylabel("Resolution Time")
plt.show()


In [None]:
#Analyze High Priority tickets by Ticket Type
high_priority_df = df[df["Priority"] == "high"]


In [None]:
#Compare High Priority Resolution Time
high_priority_avg = (
    high_priority_df
    .groupby("Cluster_Label")["Resolution_Time"]
    .mean()
)

high_priority_avg


In [None]:
#Ticket Type vs Resolution Time for High Priority
high_priority_avg.plot(kind="bar")
plt.title("High Priority Ticket Resolution Time by Ticket Type")
plt.xlabel("Ticket Type")
plt.ylabel("Average Resolution Time")
plt.show()


In [None]:
#to find no of unresolved tickets
unresolved_tickets = df[df["Resolution_Time"].isna()]
unresolved_tickets


In [None]:
#to find no of unresolved tickets by high priority
unresolved_high_priority = unresolved_tickets[
    unresolved_tickets["Priority"] == "high"
]

unresolved_high_priority

In [None]:
if unresolved_tickets.empty:
    print("No unresolved tickets found in the dataset.")
else:
    unresolved_tickets["Priority"].value_counts().plot(kind="bar")
    plt.title("Unresolved Tickets by Priority")
    plt.xlabel("Priority")
    plt.ylabel("Number of Tickets")
    plt.show()


In [None]:
# End of week 5
#Rule-based prioritization and automated ticket routing were implemented.

In [None]:
# Week 6: Visualization and Analytical Insights
#This week focuses on visualizing ticket patterns and validating the impact of rule-based intelligence.

In [None]:
#Assign random regions since the dataset has no region related data
import numpy as np

regions = ["North", "South", "East", "West", "Central"]

df["Region"] = np.random.choice(regions, size=len(df))


In [None]:
#Ticket count by Region
tickets_by_region = df["Region"].value_counts()
tickets_by_region


In [None]:
#Bar chart: Ticket count by region
tickets_by_region.plot(kind="bar")
plt.title("Ticket Concentration by Region")
plt.xlabel("Region")
plt.ylabel("Ticket Count")
plt.show()


In [None]:
#Heatmap - ticket concentration by region and priority
import seaborn as sns

heatmap_data = (
    df.groupby(["Region", "Priority"])
      .size()
      .unstack(fill_value=0)
)

sns.heatmap(heatmap_data, annot=True, cmap="Blues")
plt.title("Ticket Concentration Heatmap by Region and Priority")
plt.xlabel("Priority")
plt.ylabel("Region")
plt.show()



In [None]:
#Region with highest ticket volume
tickets_by_region.idxmax()


In [None]:
#Region with lowest ticket volume
tickets_by_region.idxmin()

In [None]:
#Category Level Geographic Analysis
category_region = (
    df.groupby(["Region", "Cluster_Label"])
      .size()
      .unstack(fill_value=0)
)

category_region


In [None]:
#Comparative bar chart
category_region.plot(kind="bar", stacked=True)
plt.title("Issue Categories Across Regions")
plt.xlabel("Region")
plt.ylabel("Ticket Count")
plt.show()


In [None]:
#Calculate average resolution time for each region
avg_resolution_by_region = (
    df.groupby("Region")["Resolution_Time"]
      .mean()
)

avg_resolution_by_region


In [None]:
#Barchart region vs average resolution time
avg_resolution_by_region.plot(kind="bar")
plt.title("Average Resolution Time by Region")
plt.xlabel("Region")
plt.ylabel("Average Resolution Time")
plt.show()


In [None]:
#Heatmap Region vs Average Resolution Time
import seaborn as sns

heatmap_data = avg_resolution_by_region.to_frame(name="Avg_Resolution_Time")

sns.heatmap(heatmap_data, annot=True, cmap="Blues")
plt.title("Heatmap of Average Resolution Time by Region")
plt.ylabel("Region")
plt.xlabel("Metric")
plt.show()


In [None]:
#prepare cluster size and analysis data
cluster_size = df["Cluster_Label"].value_counts()
cluster_size

In [None]:
#Average resolution time per cluster
cluster_performance = (
    df.groupby("Cluster_Label")["Resolution_Time"]
      .mean()
)
cluster_performance


In [None]:
#Cluster size and performance
cluster_analysis = (
    cluster_size
    .to_frame(name="Cluster_Size")
    .join(cluster_performance.to_frame(name="Avg_Resolution_Time"))
)

cluster_analysis


In [None]:
#Cluater size vs performance
plt.scatter(
    cluster_analysis["Cluster_Size"],
    cluster_analysis["Avg_Resolution_Time"]
)
plt.xlabel("Cluster Size (Number of Tickets)")
plt.ylabel("Average Resolution Time")
plt.title("Cluster Size vs Resolution Performance")
plt.show()


In [None]:
#cluster vs average resolution time
cluster_analysis["Avg_Resolution_Time"].plot(kind="bar")
plt.title("Average Resolution Time by Cluster")
plt.xlabel("Cluster")
plt.ylabel("Average Resolution Time")
plt.show()


In [None]:
worst_cluster = cluster_analysis["Avg_Resolution_Time"].idxmax()
largest_cluster = cluster_analysis["Cluster_Size"].idxmax()

worst_cluster, largest_cluster


In [None]:
# End of Week 6
#All visual insights were generated to validate clustering, prioritization, and routing logic.

In [None]:
# Milestone 3 Completed
#Performance analysis, visualization, and analytical insights were finalized.