## Call Centre Data Cleaning.

#### Importing Python libraries for data analysis.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#### Importing Call Centre data into Jupyter Notebook.

In [None]:
calls = pd.read_excel("Call Centre Data.xlsx")

#### Reading Call Centre dataset.

In [None]:
calls

#### Getting information on the Call Centre data.

In [None]:
calls.info()

### 1. Removal of duplicated rows.

In [None]:
calls.duplicated().any()

In [None]:
calls[calls["Id"].duplicated() == 1]

### 2. Data formatting & standardisation.

In [None]:
calls["Id"]

In [None]:
calls["Id"] = calls["Id"].str.upper()

In [None]:
calls = calls.rename(columns = {"Id" : "Call Id"})

In [None]:
calls["Call Id"]

In [None]:
calls["Call Timestamp"].sort_values().unique()

In [None]:
calls["Call Timestamp"] = pd.to_datetime(calls["Call Timestamp"], format= "%d-%m-%Y")

In [None]:
calls = calls.rename(columns = {"Call Timestamp" : "Call Date"})

In [None]:
calls["Call Date"].sort_values().unique()

In [None]:
calls["Call-Centres City"].unique()

In [None]:
calls = calls.rename(columns = {"Call-Centres City" : "Call Centre City"})

In [None]:
calls["Channel"].unique()

In [None]:
calls["Channel"] = calls["Channel"].replace("Call-Center", "Call Centre")

In [None]:
calls["Reason"].unique()

In [None]:
calls["Response Time"].unique()

In [None]:
calls["Sentiment"].unique()

In [None]:
calls["State"].sort_values().unique()

In [None]:
calls["Call Duration In Minutes"].sort_values().unique()

In [None]:
calls = calls.rename(columns = {"Call Duration In Minutes" : "Call Duration (Mins)"})

In [None]:
calls["Csat Score"].sort_values().unique()

In [None]:
calls = calls.rename(columns = {"Csat Score" : "CSAT Score"})

### 3. Imputation of blank/null values.

In [None]:
calls.isnull().sum()

In [None]:
#Using mean substitution method to impute blank

In [None]:
calls[["CSAT Score", "Sentiment"]].sort_values(by = ["CSAT Score","Sentiment"], ascending=[True, True]).drop_duplicates()

### 4. Removal of unnecessary columns.

In [None]:
#Kept all columns, no unnecessary columns.

### 5. Filtration & aggregation of dataframe.

In [None]:
calls = calls[(~calls["State"].isin(["Alaska", "Hawaii"])) & (calls["Call Date"] != "2020-10-31")]

## Call Centre Data Analysis.

#### 1. Total calls

In [None]:
calls["Call Id"].count()

#### 2. Total Call Duration (Mins)

In [None]:
calls["Call Duration (Mins)"].sum()

#### 3. Average Call Duration (Mins)

In [None]:
calls["Call Duration (Mins)"].mean().round(2)

#### 4. Average CSAT Score

In [None]:
calls["CSAT Score"].mean().round(2)

#### 5. Total calls by Call Date.

In [None]:
calls.groupby("Call Date")["Call Id"].count()

#### 6. Total Call Duration (Mins) by Call Date.

In [None]:
calls.groupby("Call Date")["Call Duration (Mins)"].sum()

#### 7. Average Call Duration (Mins) by Call Date.

In [None]:
calls.groupby("Call Date")["Call Duration (Mins)"].mean().round(2)

#### 8. Average CSAT Score by Call Date.

In [None]:
calls.groupby("Call Date")["CSAT Score"].mean().round(2)