In [None]:
# Week 1: Project Initialization and Dataset Exploration
# This week focuses on understanding the dataset structure, schema, and basic characteristics of support tickets.

import os

os.makedirs("supportlytics/data", exist_ok=True)
os.makedirs("supportlytics/notebooks", exist_ok=True)
os.makedirs("supportlytics/report/figures", exist_ok=True)
os.makedirs("supportlytics/dashboard", exist_ok=True)

os.listdir("supportlytics")


In [None]:
# Week 1: Loading the support ticket dataset
# This cell loads the CSV file into a pandas DataFrame for analysis

from google.colab import files
files.upload()

In [None]:
# Week 1: Checking available files in the working directory
# This cell verifies the presence of dataset files before processing

os.listdir()


In [None]:
# Week 1: Creating project directory structure
# This cell organizes the dataset into a structured folder for the project

import os
import shutil

os.makedirs("supportlytics/data", exist_ok=True)
shutil.move("raw_tickets.csv", "supportlytics/data/raw_tickets.csv")

In [None]:
# Week 1: Verifying dataset placement
# This cell confirms that the dataset was moved successfully

os.listdir("supportlytics/data")


In [None]:
# Week 1: Loading the support ticket dataset
# This cell reads the CSV file into a pandas DataFrame

import pandas as pd

df = pd.read_csv("supportlytics/data/raw_tickets.csv")
df.head()

In [None]:
# Week 1: Previewing dataset records
# This cell displays sample rows to understand ticket content and columns

df.info()

In [None]:
# Week 1: Inspecting dataset schema and data types
# This cell checks column types and missing values

df.isna().sum()

In [None]:
# End of Week 1
# The dataset has been successfully organized, loaded, and explored. Data quality issues and relevant fields were identified for further processing.

In [None]:
# Week 2: Analyzing ticket priority distribution
# This cell shows how tickets are distributed across priority levels

df["Priority"].value_counts()

In [None]:
# Week 2: Analyzing ticket priority distribution
# This cell shows how tickets are distributed across priority levels

df["Priority"].value_counts()

In [None]:
# Week 2: Handling missing data
# This cell removes records with missing values to ensure data quality

df = df.dropna()

df["Priority"] = df["Priority"].str.lower().str.strip()
df["Department"] = df["Department"].str.strip()

df["Tag_Count"] = df["Tags"].apply(lambda x: len(eval(x)))


In [None]:
# Week 2: Saving cleaned dataset
# This cell stores the processed data for further analysis stages

df.to_csv("supportlytics/data/cleaned_tickets.csv", index=False)

In [None]:
# Week 2: Verifying cleaned dataset storage
# This cell confirms that the cleaned dataset was saved successfully

os.listdir("supportlytics/data")

In [None]:
# End of week 2
# Data has been cleaned and saved forr further analysis

In [None]:
# Milestone 1 Completed
#Project initialization, data exploration, and data cleaning were successfully completed.