In [1]:
import pandas as pd


In [2]:
# Load the dataset
df = pd.read_csv("IT Support Ticket Data.csv")

In [3]:
# Show first 5 rows
df.head()

# Show columns and shape
print("Columns:", df.columns)
print("Number of rows and columns:", df.shape)

Columns: Index(['Unnamed: 0', 'Body', 'Department', 'Priority', 'Tags', 'Unnamed: 5',
       'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10',
       'Unnamed: 11', 'started', 'ended', 'Unnamed: 14', 'Unnamed: 15',
       'issue_num'],
      dtype='object')
Number of rows and columns: (29651, 17)


In [4]:
# Select only relevant columns
columns_needed = ['Body', 'Department', 'Priority', 'Tags', 'started', 'ended', 'issue_num']
df = df[columns_needed]

# Check first few rows and shape
df.head()
print("Columns after selection:", df.columns)
print("Shape after selection:", df.shape)

Columns after selection: Index(['Body', 'Department', 'Priority', 'Tags', 'started', 'ended',
       'issue_num'],
      dtype='object')
Shape after selection: (29651, 7)


In [5]:
# Convert 'started' and 'ended' to datetime
df['started'] = pd.to_datetime(df['started'], errors='coerce', utc=True)
df['ended'] = pd.to_datetime(df['ended'], errors='coerce', utc=True)

# Check first few datetime values
df[['started', 'ended']].head()
print(df.dtypes)

Body                       object
Department                 object
Priority                   object
Tags                       object
started       datetime64[ns, UTC]
ended         datetime64[ns, UTC]
issue_num                   int64
dtype: object


In [6]:
# Resolution time
df['Resolution_Hours'] = (df['ended'] - df['started']).dt.total_seconds() / 3600
df[['started', 'ended', 'Resolution_Hours']].head()

Unnamed: 0,started,ended,Resolution_Hours
0,2016-01-06 08:23:43+00:00,2016-01-06 08:56:55+00:00,0.553333
1,2016-01-11 10:06:19+00:00,2016-01-12 12:30:23+00:00,26.401111
2,2016-01-21 07:28:20+00:00,2016-01-26 08:21:47+00:00,120.890833
3,2016-01-26 07:44:54+00:00,2016-01-26 07:45:48+00:00,0.015
4,2016-02-01 13:45:47+00:00,2016-02-07 06:21:42+00:00,136.598611


In [7]:
# Map priority to numeric scores
priority_mapping = {'low': 1, 'medium': 2, 'high': 3}
df['Priority_Score'] = df['Priority'].map(priority_mapping)
df[['Priority', 'Priority_Score']].head()

Unnamed: 0,Priority,Priority_Score
0,high,3
1,medium,2
2,low,1
3,medium,2
4,high,3


In [8]:
# Check for missing values
print("Missing values before cleaning:\n", df.isnull().sum())

# Drop rows with missing value
df = df.dropna(subset=['Body', 'ended'])
df = df.dropna()  # drop any remaining nulls

# check no nulls remain
print("Missing values after cleaning:\n", df.isnull().sum())

Missing values before cleaning:
 Body                    1
Department              0
Priority                0
Tags                    0
started              2403
ended               17081
issue_num               0
Resolution_Hours    17088
Priority_Score          0
dtype: int64
Missing values after cleaning:
 Body                0
Department          0
Priority            0
Tags                0
started             0
ended               0
issue_num           0
Resolution_Hours    0
Priority_Score      0
dtype: int64


In [9]:
# Count tickets by Priority
priority_counts = df['Priority'].value_counts()
print("Ticket count by Priority:\n", priority_counts)

Ticket count by Priority:
 Priority
medium    5214
high      4834
low       2515
Name: count, dtype: int64


In [10]:
# Rename columns for readibility
df = df.rename(columns={
    'Body': 'Issue_Description',
    'Department': 'Department',
    'Priority': 'Priority',
    'Tags': 'Tags',
    'started': 'Start_Time',
    'ended': 'End_Time',
    'Resolution_Hours': 'Resolution_Hours',
    'Priority_Score': 'Priority_Score',
    'issue_num': 'Ticket_ID'
})

# Save cleaned dataset
df.to_csv("IT_Support_Cleaned.csv", index=False)

# Confirm final dataset
df.head()
print("Final dataset shape:", df.shape)

Final dataset shape: (12563, 9)


In [11]:
df.head(4)

Unnamed: 0,Issue_Description,Department,Priority,Tags,Start_Time,End_Time,Ticket_ID,Resolution_Hours,Priority_Score
0,"Dear Customer Support Team,I am writing to rep...",Technical Support,high,"['Account', 'Disruption', 'Outage', 'IT', 'Tec...",2016-01-06 08:23:43+00:00,2016-01-06 08:56:55+00:00,186,0.553333,3
1,"Dear Customer Support Team,I hope this message...",Returns and Exchanges,medium,"['Product', 'Feature', 'Tech Support']",2016-01-11 10:06:19+00:00,2016-01-12 12:30:23+00:00,190,26.401111,2
2,"Dear Customer Support Team,I hope this message...",Billing and Payments,low,"['Billing', 'Payment', 'Account', 'Documentati...",2016-01-21 07:28:20+00:00,2016-01-26 08:21:47+00:00,198,120.890833,1
3,"Dear Support Team,I hope this message reaches ...",Sales and Pre-Sales,medium,"['Product', 'Feature', 'Feedback', 'Tech Suppo...",2016-01-26 07:44:54+00:00,2016-01-26 07:45:48+00:00,209,0.015,2
