## Customer Complaints Data Cleaning.

#### Importing python libraries for data analysis.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#### Reading in customer complaints excel data into python.

In [None]:
complaints = pd.read_excel("Customer Complaints Data.xlsx")

#### First five rows of the dataframe

In [None]:
complaints.head()

#### Number of non-null values and data types for each column.

In [None]:
complaints.info()

### 1. Removing duplicated rows.

In [None]:
complaints["Complaint ID"].nunique()

### 2. Formatting the data

In [None]:
complaints.columns

In [None]:
complaints["Company"].unique()

In [None]:
complaints["Company"].nunique()

In [None]:
complaints["Company public response"].unique()

In [None]:
complaints["Company response to consumer"].unique()

#### Grouping all closed complaints to "Closed" and in progress to "Pending".

In [None]:
complaints["Progress"] = np.where(complaints["Company response to consumer"].str.contains("Closed"), "Closed", np.where(complaints["Company response to consumer"] == "Untimely response", "Closed", "Pending"))

In [None]:
complaints[["Company response to consumer", "Progress"]].drop_duplicates("Company response to consumer")

In [None]:
complaints["Consumer consent provided?"].unique()

In [None]:
complaints["Consumer disputed?"].unique()

In [None]:
complaints["Date Received"].unique()

In [None]:
complaints["Date Submitted"].unique()

In [None]:
complaints["Issue"].unique()

In [None]:
complaints["Product"].sort_values().unique()

#### Creating a function to group various Product complaints into a Service columns.

In [None]:
def func(complaints):
    if complaints['Product'] == "Bank account or service":
        return 'Account'
    elif complaints['Product'] == "Checking or savings account":
        return 'Account'
    elif complaints['Product'] == "Debt Collection":
        return 'Banking'
    elif complaints['Product'] == "Student loan":
        return 'Banking'
    elif complaints['Product'] == "Vehicle loan or lease":
        return 'Banking'
    elif complaints['Product'] == "Credit card":
        return 'Credit Card'
    elif complaints['Product'] == "Credit card or prepaid card":
        return 'Credit Card'
    else:
        return 'Mortgage'

complaints["Service"] = complaints.apply(func, axis=1)

In [None]:
complaints[["Product", "Service"]].drop_duplicates("Product").sort_values(by=["Service", "Product"])

In [None]:
complaints.columns

In [None]:
complaints["State"].unique()

In [None]:
complaints["Sub-issue"].unique()

In [None]:
complaints["Sub-product"].unique()

In [None]:
complaints["Submitted via"].unique()

In [None]:
complaints["Tags"].unique()

In [None]:
complaints["Timely response?"].unique()

In [None]:
complaints["ZIP code"].unique()

In [None]:
complaints["Number of Complaints"].unique()

In [None]:
complaints["Target"].unique()

In [None]:
complaints["Time to Receipt"].sort_values().unique()

In [None]:
complaints["Date Diff"] = (complaints["Date Received"] - complaints["Date Submitted"])
complaints[["Time to Receipt", "Date Diff"]].sort_values(by="Time to Receipt").drop_duplicates()

In [None]:
complaints["Time to Receipt"][complaints["Time to Receipt"] == -1].count()

### 3. Imputing blank or null values.

In [None]:
complaints.isna().sum()

### 4. Removing columns that are redundant or not relevant to the analysis.

In [None]:
complaints.drop(["Company public response", "All Complaints (Selected)", "Number of Complaints", "Target"], axis = 1)

## Customer Complaints Data Analysis.

#### 1. Total Complaints

In [None]:
complaints["Complaint ID"].count()

#### 2. Total Closed Complaints

In [None]:
complaints[complaints["Progress"] == "Closed"]["Complaint ID"].count()

In [None]:
(complaints[complaints["Progress"] == "Closed"]["Complaint ID"].count()*100/complaints["Complaint ID"].count()).round(2)

#### 3. Total Pending Complaints

In [None]:
complaints[complaints["Progress"] == "Pending"]["Complaint ID"].count()

In [None]:
(complaints[complaints["Progress"] == "Pending"]["Complaint ID"].count()*100/complaints["Complaint ID"].count()).round(2)

#### 4. Total Timely Responded Complaints

In [None]:
complaints[complaints["Timely response?"] == "Yes"]["Complaint ID"].count()

In [None]:
(complaints[complaints["Timely response?"] == "Yes"]["Complaint ID"].count()*100/complaints["Complaint ID"].count()).round(2)

#### 5. Total Disputed Complaints

In [None]:
complaints[complaints["Consumer disputed?"] == "Yes"]["Complaint ID"].count()

In [None]:
(complaints[complaints["Consumer disputed?"] == "Yes"]["Complaint ID"].count()*100/complaints["Complaint ID"].count()).round(2)

#### 6. Total Complaints by Year

In [None]:
complaints["Year"] = complaints["Date Received"].dt.year

In [None]:
complaints.groupby("Year")["Complaint ID"].count()

#### 7. Total Complaints by State

In [None]:
complaints.groupby("State")["Complaint ID"].count()

#### 8. Total Complaints by Issue

In [None]:
complaints.groupby("Issue")["Complaint ID"].count()

#### 9. Total Complaints by Consumer Consent Provided?

In [None]:
complaints.groupby("Consumer consent provided?")["Complaint ID"].count()

#### 10. Total Complaints by Submitted via

In [None]:
complaints.groupby("Submitted via")["Complaint ID"].count()

In [None]:
(complaints.groupby("Submitted via")["Complaint ID"].count()*100/complaints["Complaint ID"].count()).round(2)

#### 11. Total Complaints by Consumer Disputed?

In [None]:
complaints.groupby("Consumer disputed?")["Complaint ID"].count()

In [None]:
(complaints.groupby("Consumer disputed?")["Complaint ID"].count()*100/complaints["Complaint ID"].count()).round(2)

#### 12. Total Complaints by Tags

In [None]:
complaints.groupby("Tags")["Complaint ID"].count()

In [None]:
(complaints.groupby("Tags")["Complaint ID"].count()*100/complaints["Complaint ID"].count()).round(2)

#### 13. Total Account Complaints

In [None]:
complaints[complaints["Service"] == "Account"]["Complaint ID"].count()

#### 14. Closed Account Complaints

In [None]:
complaints[(complaints["Service"] == "Account") & (complaints["Progress"] == "Closed")]["Complaint ID"].count()

In [None]:
(complaints[(complaints["Service"] == "Account") & (complaints["Progress"] == "Closed")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Account"]["Complaint ID"].count()).round(2)

#### 15. Pending Account Complaints

In [None]:
complaints[(complaints["Service"] == "Account") & (complaints["Progress"] == "Pending")]["Complaint ID"].count()

In [None]:
complaints[(complaints["Service"] == "Account") & (complaints["Progress"] == "Pending")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Account"]["Complaint ID"].count()

#### 16. Timely Response Account Complaints

In [None]:
complaints[(complaints["Service"] == "Account") & (complaints["Timely response?"] == "Yes")]["Complaint ID"].count()

In [None]:
complaints[(complaints["Service"] == "Account") & (complaints["Timely response?"] == "Yes")]["Complaint ID"].count()*100/complaints[(complaints["Service"] == "Account") & (complaints["Progress"] == "Closed")]["Complaint ID"].count()

#### 17. Disputed Account Complaints

In [None]:
complaints[(complaints["Service"] == "Account") & (complaints["Consumer disputed?"] == "Yes")]["Complaint ID"].count()

In [None]:
complaints[(complaints["Service"] == "Account") & (complaints["Consumer disputed?"] == "Yes")]["Complaint ID"].count()*100/complaints[(complaints["Service"] == "Account") & (complaints["Progress"] == "Closed")]["Complaint ID"].count()

#### 18. Account Complaints by Year

In [None]:
complaints[complaints["Service"] == "Account"].groupby("Year")["Complaint ID"].count()

#### 19. Account Complaints by State

In [None]:
complaints[complaints["Service"] == "Account"].groupby("State")["Complaint ID"].count()

#### 20. Account Complaints by Issue

In [None]:
complaints[complaints["Service"] == "Account"].groupby("Issue")["Complaint ID"].count()

#### 21. Account Complaints by Consumer Consent Provided?

In [None]:
complaints[complaints["Service"] == "Account"].groupby("Consumer consent provided?")["Complaint ID"].count()

#### 22. Account Complaints by Submitted Via

In [None]:
complaints[complaints["Service"] == "Account"].groupby("Submitted via")["Complaint ID"].count()

In [None]:
(complaints[complaints["Service"] == "Account"].groupby("Submitted via")["Complaint ID"].count()*100/complaints[complaints["Service"] == "Account"]["Complaint ID"].count()).round(2)

#### 23. Account Complaints by Consumer Disputed?

In [None]:
complaints[complaints["Service"] == "Account"].groupby("Consumer disputed?")["Complaint ID"].count()

In [None]:
(complaints[complaints["Service"] == "Account"].groupby("Consumer disputed?")["Complaint ID"].count()*100/complaints[complaints["Service"] == "Account"]["Complaint ID"].count()).round(2)

#### 24. Account Complaints by Tags

In [None]:
complaints[complaints["Service"] == "Account"].groupby("Tags")["Complaint ID"].count()

In [None]:
(complaints[complaints["Service"] == "Account"].groupby("Tags")["Complaint ID"].count()*100/complaints[complaints["Service"] == "Account"]["Complaint ID"].count()).round(2)

#### 25. Total Banking Complaints

In [None]:
complaints[complaints["Service"] == "Banking"]["Complaint ID"].count()

#### 26. Closed Banking Complaints

In [None]:
complaints[(complaints["Service"] == "Banking") & (complaints["Progress"] == "Closed")]["Complaint ID"].count()

In [None]:
(complaints[(complaints["Service"] == "Banking") & (complaints["Progress"] == "Closed")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Banking"]["Complaint ID"].count()).round(2)

#### 27. Pending Banking Complaints

In [None]:
complaints[(complaints["Service"] == "Banking") & (complaints["Progress"] == "Pending")]["Complaint ID"].count()

In [None]:
(complaints[(complaints["Service"] == "Banking") & (complaints["Progress"] == "Pending")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Banking"]["Complaint ID"].count()).round(2)

#### 28. Timely Responded Banking Complaints

In [None]:
complaints[(complaints["Service"] == "Banking") & (complaints["Timely response?"] == "Yes")]["Complaint ID"].count()

In [None]:
(complaints[(complaints["Service"] == "Banking") & (complaints["Timely response?"] == "Yes")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Banking"]["Complaint ID"].count()).round(2)

#### 29. Disputed Banking Complaints

In [None]:
complaints[(complaints["Service"] == "Banking") & (complaints["Consumer disputed?"] == "Yes")]["Complaint ID"].count()

In [None]:
complaints[(complaints["Service"] == "Banking") & (complaints["Consumer disputed?"] == "Yes")]["Complaint ID"].count()

#### 30. Banking Complaints by Year

In [None]:
complaints[complaints["Service"] == "Banking"].groupby("Year")["Complaint ID"].count()

#### 31. Banking Complaints by State

In [None]:
complaints[complaints["Service"] == "Banking"].groupby("State")["Complaint ID"].count()

#### 32. Banking Complaints by Issue

In [None]:
complaints[complaints["Service"] == "Banking"].groupby("Issue")["Complaint ID"].count()

#### 33. Banking Complaints by Consumer Consent Provided?

In [None]:
complaints[complaints["Service"] == "Banking"].groupby("Consumer consent provided?")["Complaint ID"].count()

#### 34. Banking Complaints by Submitted Via

In [None]:
complaints[complaints["Service"] == "Banking"].groupby("Submitted via")["Complaint ID"].count()

In [None]:
(complaints[complaints["Service"] == "Banking"].groupby("Submitted via")["Complaint ID"].count()*100/complaints[complaints["Service"] == "Banking"]["Complaint ID"].count()).round(2)

#### 35. Banking Complaints by Consumer Disputed?

In [None]:
complaints[complaints["Service"] == "Banking"].groupby("Consumer disputed?")["Complaint ID"].count()

In [None]:
(complaints[complaints["Service"] == "Banking"].groupby("Consumer disputed?")["Complaint ID"].count()*100/complaints[complaints["Service"] == "Banking"]["Complaint ID"].count()).round(2)

#### 36. Banking Complaints by Tags

In [None]:
complaints[complaints["Service"] == "Banking"].groupby("Tags")["Complaint ID"].count()

In [None]:
(complaints[complaints["Service"] == "Banking"].groupby("Tags")["Complaint ID"].count()*100/complaints[complaints["Service"] == "Banking"]["Complaint ID"].count()).round(2)

#### 37. Total Credit Card Complaints

In [None]:
complaints[complaints["Service"] == "Credit Card"]["Complaint ID"].count()

#### 38. Closed Credit Card Complaints

In [None]:
complaints[(complaints["Service"] == "Credit Card") & (complaints["Progress"] == "Closed")]["Complaint ID"].count()

In [None]:
(complaints[(complaints["Service"] == "Credit Card") & (complaints["Progress"] == "Closed")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Credit Card"]["Complaint ID"].count()).round(2)

#### 39. Pending Credit Card Complaints

In [None]:
complaints[(complaints["Service"] == "Credit Card") & (complaints["Progress"] == "Pending")]["Complaint ID"].count()

In [None]:
(complaints[(complaints["Service"] == "Credit Card") & (complaints["Progress"] == "Pending")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Credit Card"]["Complaint ID"].count()).round(2)

#### 40. Timely Responded Credit Card Complaints

In [None]:
complaints[(complaints["Service"] == "Credit Card") & (complaints["Timely response?"] == "Yes")]["Complaint ID"].count()

In [None]:
(complaints[(complaints["Service"] == "Credit Card") & (complaints["Timely response?"] == "Yes")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Credit Card"]["Complaint ID"].count()).round(2)

#### 41. Disputed Credit Card Complaints

In [None]:
complaints[(complaints["Service"] == "Credit Card") & (complaints["Consumer disputed?"] == "Yes")]["Complaint ID"].count()

In [None]:
(complaints[(complaints["Service"] == "Credit Card") & (complaints["Consumer disputed?"] == "Yes")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Credit Card"]["Complaint ID"].count()).round(2)

#### 42. Credit Card Complaints by Year

In [None]:
complaints[complaints["Service"] == "Credit Card"].groupby("Year")["Complaint ID"].count()

#### 43. Credit Card Complaints by State

In [None]:
complaints[complaints["Service"] == "Credit Card"].groupby("State")["Complaint ID"].count()

#### 44. Credit Card Complaints by Issue

In [None]:
complaints[complaints["Service"] == "Credit Card"].groupby("Issue")["Complaint ID"].count()

#### 45. Credit Card Complaints by Consumer Consent Provided?

In [None]:
complaints[complaints["Service"] == "Credit Card"].groupby("Consumer consent provided?")["Complaint ID"].count()

#### 46. Credit Card Complaints by Submitted Via

In [None]:
complaints[complaints["Service"] == "Credit Card"].groupby("Submitted via")["Complaint ID"].count()

In [None]:
(complaints[complaints["Service"] == "Credit Card"].groupby("Submitted via")["Complaint ID"].count()*100/complaints[complaints["Service"] == "Credit Card"]["Complaint ID"].count()).round(2)

#### 47. Credit Card Complaints by Consumer Disputed?

In [None]:
complaints[complaints["Service"] == "Credit Card"].groupby("Consumer disputed?")["Complaint ID"].count()

In [None]:
(complaints[complaints["Service"] == "Credit Card"].groupby("Consumer disputed?")["Complaint ID"].count()*100/complaints[complaints["Service"] == "Credit Card"]["Complaint ID"].count()).round(2)

#### 48. Credit Card Complaints by Tags

In [None]:
complaints[complaints["Service"] == "Credit Card"].groupby("Tags")["Complaint ID"].count()

In [None]:
(complaints[complaints["Service"] == "Credit Card"].groupby("Tags")["Complaint ID"].count()*100/complaints[complaints["Service"] == "Credit Card"]["Complaint ID"].count()).round(2)

#### 49. Total Mortgage Complaints

In [None]:
complaints[complaints["Service"] == "Mortgage"]["Complaint ID"].count()

#### 50. Closed Mortgage Complaints

In [None]:
complaints[(complaints["Service"] == "Mortgage") & (complaints["Progress"] == "Closed")]["Complaint ID"].count()

In [None]:
(complaints[(complaints["Service"] == "Mortgage") & (complaints["Progress"] == "Closed")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Mortgage"]["Complaint ID"].count()).round(2)

#### 51. Pending Mortgage Complaints

In [None]:
complaints[(complaints["Service"] == "Mortgage") & (complaints["Progress"] == "Pending")]["Complaint ID"].count()

In [None]:
(complaints[(complaints["Service"] == "Mortgage") & (complaints["Progress"] == "Pending")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Mortgage"]["Complaint ID"].count()).round(2)

#### 52. Timely Responded Mortgage Complaints

In [None]:
complaints[(complaints["Service"] == "Mortgage") & (complaints["Timely response?"] == "Yes")]["Complaint ID"].count()

In [None]:
(complaints[(complaints["Service"] == "Mortgage") & (complaints["Timely response?"] == "Yes")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Mortgage"]["Complaint ID"].count()).round(2)

#### 53. Disputed Mortgage Complaints

In [None]:
complaints[(complaints["Service"] == "Mortgage") & (complaints["Consumer disputed?"] == "Yes")]["Complaint ID"].count()

In [None]:
(complaints[(complaints["Service"] == "Mortgage") & (complaints["Consumer disputed?"] == "Yes")]["Complaint ID"].count()*100/complaints[complaints["Service"] == "Mortgage"]["Complaint ID"].count()).round(2)

#### 54. Mortgage Complaints by Year

In [None]:
complaints[complaints["Service"] == "Mortgage"].groupby("Year")["Complaint ID"].count()

#### 55. Mortgage Complaints by State

In [None]:
complaints[complaints["Service"] == "Mortgage"].groupby("State")["Complaint ID"].count()

#### 56. Mortgage Complaints by Issue

In [None]:
complaints[complaints["Service"] == "Mortgage"].groupby("Issue")["Complaint ID"].count()

#### 57. Mortgage Complaints by Consumer Consent Provided?

In [None]:
complaints[complaints["Service"] == "Mortgage"].groupby("Consumer consent provided?")["Complaint ID"].count()

#### 58. Mortgage Complaints by Submitted Via

In [None]:
complaints[complaints["Service"] == "Mortgage"].groupby("Submitted via")["Complaint ID"].count()

In [None]:
(complaints[complaints["Service"] == "Mortgage"].groupby("Submitted via")["Complaint ID"].count()*100/complaints[complaints["Service"] == "Mortgage"]["Complaint ID"].count()).round(2)

#### 59. Mortgage Complaints by Consumer Disputed?

In [None]:
complaints[complaints["Service"] == "Mortgage"].groupby("Consumer disputed?")["Complaint ID"].count()

In [None]:
(complaints[complaints["Service"] == "Mortgage"].groupby("Consumer disputed?")["Complaint ID"].count()*100/complaints[complaints["Service"] == "Mortgage"]["Complaint ID"].count()).round(2)

#### 60. Mortgage Complaints by Tags

In [None]:
complaints[complaints["Service"] == "Mortgage"].groupby("Tags")["Complaint ID"].count()

In [None]:
(complaints[complaints["Service"] == "Mortgage"].groupby("Tags")["Complaint ID"].count()*100/complaints[complaints["Service"] == "Mortgage"]["Complaint ID"].count()).round(2)