<b><H1>Project Name : Comcast Telecom Consumer Complaints</H1></b><br>
Name: Daniel Manova<br>
Date: 01-04-2021<br>
Course: Post Graduate Program in AI and Machine Learning<br>
Cohort : PGP AIML FEB 2021 Cohort 1<br>

In [None]:
# Importing all necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tabulate import tabulate

<b>Import data into Python environment</b>

In [None]:
# Read comcast telecome complaints data
df_comcast_telecom = pd.read_csv('../input/telecom-complaints-monitoring-system/Comcast_telecom_complaints_data.csv')


In [None]:
# Top 10 data visualization
df_comcast_telecom.head(10)

In [None]:
# Get dataframe information
df_comcast_telecom.info()

In [None]:
# Checking for Null Count
df_comcast_telecom[df_comcast_telecom.isnull()].count()

In [None]:
# Converting Date and Date_month_year column from object to datetime64 dtype 
df_comcast_telecom['Date_month_year'] = pd.to_datetime(df_comcast_telecom['Date_month_year'])
df_comcast_telecom['Date'] = pd.to_datetime(df_comcast_telecom['Date'],format='%d-%m-%y')
df_comcast_telecom.dtypes

<b>Provide the trend chart for the number of complaints at monthly and daily granularity levels<b>

In [None]:
# Creating new column Month from Date_month_year
df_comcast_telecom['Month'] = df_comcast_telecom['Date_month_year'].dt.month
df_comcast_telecom.head()

In [None]:
# Monthly Complaints Trend chart using matplotlib
plt.figure(figsize=(15,7))
plt.plot(sorted(df_comcast_telecom["Month"].unique()), df_comcast_telecom.groupby(["Month"]).size(), color='#5900b3',linestyle='-' , linewidth = 3, label ='Tickets per Month') # in the order x, y
plt.xticks(sorted(df_comcast_telecom["Month"].unique()),['Jan', 'Feb', 'Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
plt.yticks()
plt.xlabel('Months')
plt.ylabel('Number of Tickets Created from Jan to Dec')
plt.title('Monthly Complaints Trend')
plt.legend()
plt.show()


In [None]:
# Daily Complaints Trend chart using matplotlib

plt.figure(figsize=(15,7))
plt.plot(sorted(df_comcast_telecom["Date_month_year"].unique()), df_comcast_telecom.groupby(["Date_month_year"]).size(), color='#5900b3',linestyle='-' , linewidth = 3, label ='Tickets per Day') # in the order x, y
plt.xticks()
plt.xlabel('Dates')
plt.ylabel('Number of Tickets Created')
plt.title('Daily Complaints Trend')
plt.legend()
plt.show()

<b>Provide a table with the frequency of complaint types</b>

In [None]:
# Create a new column to list the types of Customer Complaints
df_comcast_telecom["Complaint Catagory"] = np.where(df_comcast_telecom["Customer Complaint"].str.contains('Internet|Speed|wifi|Access|Throttling|throt|Broadband|Cable|connection|DSL|modem|ip|ISP',case=False,regex=True), 'Internet',
         np.where(df_comcast_telecom["Customer Complaint"].str.contains('\$|Price|Pricing|Cost|Payment|Charg|pay|Bill|Billing|Rate|Contract|fee',case=False,regex=True), 'Billing',
         np.where(df_comcast_telecom["Customer Complaint"].str.contains('Service|Cap|usage|Data|Customer|switch|wait|support|TV|avail|network|cramming',case=False,regex=True), 'Network',
         np.where(df_comcast_telecom["Customer Complaint"].str.contains('Customer|complaint|breach|bully|False|fed up',case=False,regex=True), 'Grievance',
         np.where(df_comcast_telecom["Customer Complaint"].str.contains('email',case=False,regex=True), 'Email',
         np.where(df_comcast_telecom["Customer Complaint"].str.contains('outage',case=False,regex=True), 'Outages',
         'Other'))))))

In [None]:
# Display Catagorised Category in tabuler format
print(tabulate(df_comcast_telecom.groupby(["Complaint Catagory"]).size().to_frame(), headers=['Complaint Category','Number of Complaints'], tablefmt="fancy_grid"))

In [None]:
#Percentage of Issues By Category
ax = df_comcast_telecom['Complaint Catagory'].value_counts().plot(kind='bar', figsize=(20,10), color="#69E1DA", fontsize=11,width=0.2);
ax.set_alpha(0.8)
ax.set_title("Frequency of Complaint Types", fontsize=12)
ax.set_xlabel("Category of Complaints", fontsize=12);
ax.set_ylabel("Number of Complients", fontsize=12);
ax.tick_params(axis='x', labelrotation=360)
totals = []
for i in ax.patches:
    totals.append(i.get_height())
total = sum(totals)
for i in ax.patches:
    width, height = i.get_width(), i.get_height()
    x, y = i.get_xy() 
    ax.text(i.get_x(), i.get_height()+2, str(round((i.get_height()/total)*100, 2))+'%', fontsize=11,color ="red")
    ax.text(x+width/2, y+height/2,'{:.0f}'.format(height), horizontalalignment='center', verticalalignment='center')

<b>Create a new categorical variable with value as Open and Closed. Open & Pending is to be categorized as Open and Closed & Solved is to be categorized as Closed.</b>

In [None]:
#Find the unique elementents in Status column
df_comcast_telecom['Status'].unique()

In [None]:
#Create new column and catagorize status
df_comcast_telecom['Consolidated Status'] = ['Closed' if Status == 'Closed' or Status == 'Solved' else 'Open' for Status in df_comcast_telecom['Status']]
df_comcast_telecom.head()

<b>Provide state wise status of complaints in a stacked bar chart. Use the categorized variable from Q3. Provide insights on</b>

In [None]:
# Create a df for state wise status
State_wise_status = df_comcast_telecom.groupby(['State','Consolidated Status']).size().unstack(level=-1).fillna(0).astype(int)

In [None]:
# State wise complaint status using Stacked bar chart with pandas
sws = State_wise_status.plot(kind='bar', figsize = (30,10), color=['#00b386','#0086b3'],title  = 'State wise status of complaints', xlabel = 'States', ylabel = 'Number of complaints', stacked = True)
sws.tick_params(axis='x', labelrotation=90)
sws.set_alpha(0.8)
for p in sws.patches:
    width, height = p.get_width(), p.get_height()
    x, y = p.get_xy() 
    sws.text(x+width/2, y+height/2, '{:.0f}'.format(height), color="white", horizontalalignment='center', verticalalignment='center', fontweight='bold')

<b>Which state has the maximum complaints</b>

In [None]:
# State has maximum complaints with ticket count
State_wise_status['Total Complaints'] = State_wise_status['Closed']+State_wise_status['Open']
print("State with maximum complains : {}".format(State_wise_status['Total Complaints'].idxmax()))
print("Total open complaints in {} : {}".format(State_wise_status['Total Complaints'].idxmax(),State_wise_status['Open'].max()))
print("Total closed complaints in {} : {}".format(State_wise_status['Total Complaints'].idxmax(),State_wise_status['Closed'].max()))

<b>Which state has the highest percentage of unresolved complaints</b>

In [None]:
# State with highest percentage of un-resolved complaints
State_wise_status['Un-Resolved %'] =  round((State_wise_status['Open']/State_wise_status['Open'].sum()) *100,2)
print("State with highest % of un-resolved complains : {}".format(State_wise_status['Un-Resolved %'].idxmax()))
print("Un-resolved complaints % in {} : {}%".format(State_wise_status['Total Complaints'].idxmax(),State_wise_status['Un-Resolved %'].max()))

<b>Provide the percentage of complaints resolved till date, which were received through the Internet and customer care calls</b>

In [None]:
# Complaints received through Internet and Customer Care call with resolved percentage
Complaints_resolved = df_comcast_telecom.groupby(['Received Via','Consolidated Status']).size().unstack(level=-1)
Complaints_resolved['Resolved %'] = round((Complaints_resolved['Closed']/Complaints_resolved['Closed'].sum())*100,2)
print("Resolved % of complaints, received through Internet :  {}%".format(Complaints_resolved.loc['Internet']['Resolved %']))
print("Resolved % of complaints, received through Customer Care Call : {}%".format(Complaints_resolved.loc['Customer Care Call']['Resolved %']))

In [None]:
# Pie Chart for the complaints received through Internet and Customer Care call with resolved percentage
colors = ['#0086b3','#00b386'] 
labels = list(Complaints_resolved.index)
fig, ax1 = plt.subplots(figsize=(17,7))
fig.figsize=(20,10)
ax1.pie(Complaints_resolved['Closed'], colors = colors, labels=labels, autopct='%1.1f%%',startangle=70)
centre_circle = plt.Circle((0,0),0.50,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)  
plt.legend(labels, bbox_to_anchor=(1.2,1),loc="upper right")
plt.title('Percentage of complaints resolved')
plt.show()

<b>End of the Project</b>