In [74]:
import pandas as pd
import numpy as np

In [75]:
file_path = 'Call Center Data.csv'
data = pd.read_csv(file_path)

### Data Exploration (EDA)

In [76]:
# Explore the first few rows of the dataset
data.head()

Unnamed: 0,Index,Incoming Calls,Answered Calls,Answer Rate,Abandoned Calls,Answer Speed (AVG),Talk Duration (AVG),Waiting Time (AVG),Service Level (20 Seconds)
0,1,217,204,94.01%,13,0:00:17,0:02:14,0:02:45,76.28%
1,2,200,182,91.00%,18,0:00:20,0:02:22,0:06:55,72.73%
2,3,216,198,91.67%,18,0:00:18,0:02:38,0:03:50,74.30%
3,4,155,145,93.55%,10,0:00:15,0:02:29,0:03:12,79.61%
4,5,37,37,100.00%,0,0:00:03,0:02:06,0:00:35,97.30%


In [77]:
len(data)

1251

In [78]:
data.describe()

Unnamed: 0,Index,Incoming Calls,Answered Calls,Abandoned Calls
count,1251.0,1251.0,1251.0,1251.0
mean,626.0,198.539568,176.845723,21.693845
std,361.276902,156.534195,115.61208,59.671955
min,1.0,5.0,5.0,0.0
25%,313.5,123.0,114.0,3.0
50%,626.0,177.0,166.0,8.0
75%,938.5,233.0,214.5,16.0
max,1251.0,1575.0,909.0,704.0


In [79]:
# Check for nulls in the data
data.isnull().sum()

Index                         0
Incoming Calls                0
Answered Calls                0
Answer Rate                   0
Abandoned Calls               0
Answer Speed (AVG)            0
Talk Duration (AVG)           0
Waiting Time (AVG)            0
Service Level (20 Seconds)    0
dtype: int64

In [80]:
# Get the data types
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1251 entries, 0 to 1250
Data columns (total 9 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   Index                       1251 non-null   int64 
 1   Incoming Calls              1251 non-null   int64 
 2   Answered Calls              1251 non-null   int64 
 3   Answer Rate                 1251 non-null   object
 4   Abandoned Calls             1251 non-null   int64 
 5   Answer Speed (AVG)          1251 non-null   object
 6   Talk Duration (AVG)         1251 non-null   object
 7   Waiting Time (AVG)          1251 non-null   object
 8   Service Level (20 Seconds)  1251 non-null   object
dtypes: int64(4), object(5)
memory usage: 88.1+ KB


### Data Processing & Cleansing

In [81]:
# Convert percentages to float
data['Answer Rate'] = data['Answer Rate'].str.rstrip('%').astype('float') / 100.0
data['Service Level (20 Seconds)'] = data['Service Level (20 Seconds)'].str.rstrip('%').astype('float') / 100.0

In [73]:
# # Convert the h:mm:ss columns into datetime type
# data['Answer Speed (AVG)'] = data['Answer Speed (AVG)'].apply(pd.to_datetime, format='%H:%M:%S', errors='coerce',utc= False).dt.time
# data['Talk Duration (AVG)'] = data['Talk Duration (AVG)'].apply(pd.to_datetime, format='%H:%M:%S', errors='coerce',utc= False).dt.time
# data['Waiting Time (AVG)'] = data['Talk Duration (AVG)'].apply(pd.to_datetime, format='%H:%M:%S', errors='coerce',utc= False).dt.time

In [91]:
# Get the total seconds for the times
data['Avg Answer Speed (sec)'] = pd.to_timedelta(data['Answer Speed (AVG)']).dt.total_seconds()
data['Avg Talk Duration (sec)'] = pd.to_timedelta(data['Talk Duration (AVG)']).dt.total_seconds()
data['Avg Waiting Time (sec)'] = pd.to_timedelta(data['Waiting Time (AVG)']).dt.total_seconds()

In [93]:
data.drop(columns=['Answer Speed (AVG)','Talk Duration (AVG)','Waiting Time (AVG)'],inplace=True)

In [97]:
data.head()

Unnamed: 0,Index,Incoming Calls,Answered Calls,Answer Rate,Abandoned Calls,Service Level (20 Seconds),Avg Answer Speed (sec),Avg Talk Duration (sec),Avg Waiting Time (sec)
0,1,217,204,0.9401,13,0.7628,17.0,134.0,165.0
1,2,200,182,0.91,18,0.7273,20.0,142.0,415.0
2,3,216,198,0.9167,18,0.743,18.0,158.0,230.0
3,4,155,145,0.9355,10,0.7961,15.0,149.0,192.0
4,5,37,37,1.0,0,0.973,3.0,126.0,35.0


In [101]:
data.rename(columns={'Index':'Employee ID'},inplace=True)

In [103]:
data.to_csv('clean_data.csv',index=False)