In [None]:
"""DESCRIPTION

Comcast is an American global telecommunication company. The firm has been providing terrible customer service. 
They continue to fall short despite repeated promises to improve. Only last month (October 2016) the authority fined them a $2.3 million, 
after receiving over 1000 consumer complaints.

The existing database will serve as a repository of public customer complaints filed against Comcast.
It will help to pin down what is wrong with Comcast's customer service """


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Task 1: Import data into Python environment
df=pd.read_csv('../input/comcast-data/Comcast_telecom_complaints_data.csv')

In [None]:
df.head(10)

In [None]:
df.isna().sum()

In [None]:
df['Received Via'].unique()

In [None]:
df.Status.unique()

In [None]:
df.set_index('Date_month_year', inplace=True)

In [None]:
df.head()

In [None]:
df.Date.value_counts()[:5]

In [None]:
df=df.sort_values(by='Date')

In [None]:
df.head()

In [None]:
# Task 2: Provide the trend chart for the number of complaints at monthly
# and daily granularity levels
# Based on daily basis
sns.set_style('darkgrid')
plt.figure(figsize=(25,12))
sns.countplot(df.Date)
plt.xlabel('Date')
plt.ylabel('Frequency')
plt.title('Date vs Frequency')
plt.xticks(rotation=90)

In [None]:
df.index=pd.to_datetime(df.index)

In [None]:
df.head()

In [None]:
# Based on monthly basis
sns.set_style('darkgrid')
Month=df.groupby(pd.Grouper(freq='M')).size().plot.bar(figsize=(10,6)) 
plt.xlabel('Month')
plt.ylabel('Frequency')
plt.title('Month vs Frequency', color='blue')

In [None]:
# Task 3: Provide a table with the frequency of complaint types
df['Customer Complaint'].value_counts()

In [None]:
sns.set_style('darkgrid')
plt.figure(figsize=(8,6))
df['Customer Complaint'].value_counts()[:10].plot.bar(color='orange')
plt.title('Count of different types of customer complaints')

In [None]:
# Task 4: Which complaint types are maximum

In [None]:
internet_issues= df[df['Customer Complaint'].str.contains('network')].count()

In [None]:
internet_issues

In [None]:
internet_issues_2= df[df['Customer Complaint'].str.contains('speed')].count()
internet_issues_2

In [None]:
internet_issues_3= df[df['Customer Complaint'].str.contains('data')].count()
internet_issues_3

In [None]:
internet_issues_4= df[df['Customer Complaint'].str.contains('internet')].count()
internet_issues_4

In [None]:
total_internet_issues=(internet_issues+internet_issues_2+internet_issues_3
                       +internet_issues_4)

In [None]:
total_internet_issues

In [None]:
billing_issues=df[df['Customer Complaint'].str.contains('billing')].count()
billing_issues

In [None]:
billing_issues_1=df[df['Customer Complaint'].str.contains('charge')].count()
billing_issues_1

In [None]:
service_issues= df[df['Customer Complaint'].str.contains('service')].count()
service_issues

In [None]:
total_issues= (internet_issues+internet_issues_2+internet_issues_3+
               internet_issues_4+billing_issues+billing_issues_1
               +service_issues)

In [None]:
total_issues

In [None]:
other_issues=2224-total_issues
other_issues 
# Conclusion of Task 3: Other issues are more that total issues

In [None]:
# Task 4:Create a new categorical variable with value as Open and Closed. 
# Open & Pending is to be categorized as Open and Closed & Solved is
# to be categorized as Closed.
df.Status.unique()

In [None]:
df['Status_cur']=['Open' if i=='Open' or i=='Solved' 
                  else 'Closed' for i in df.Status]

In [None]:
df.head()

In [None]:
# Provide state wise status of complaints in a stacked bar chart.
#Use the categorized variable from Q3. Provide insights on:
# Which state has the maximum complaints
# Which state has the highest percentage of unresolved complaints
# Provide the percentage of complaints resolved till date,
# which were received through the Internet and customer care calls.

In [None]:
statewise_comp= df.groupby(['State','Status_cur']).size().unstack().fillna(0)

In [None]:
statewise_comp.head(10)

plt.figure(figsize=(6,6))
statewise_comp.plot.bar(figsize=(25,15), stacked=True)
plt.title('Statewise status of customer complaints')

In [None]:
# Which state has maximum complaints ?
df.groupby(['State']).size().sort_values(ascending=False)[:5]
# Conclusion: State of Georgia has maximum complaints

In [None]:
df.Status_cur.value_counts()

In [None]:
# Which state has the highest percentage of unresolved complaints ?
unres_comp= (df.groupby(['State','Status_cur']).size().unstack().fillna(0)
            .sort_values(by='Open', ascending=False))

In [None]:
unres_comp['unres_comp_percentage']= unres_comp['Open']/ unres_comp['Open'].sum()*100

In [None]:
unres_comp.head(10)

In [None]:
# Percentage of total unresolved cases in all States
print(unres_comp.Open.sum()/2224*100) 

In [None]:
sns.set_style('darkgrid')
plt.figure(figsize=(10,5))
unres_comp['unres_comp_percentage'].plot.bar(color='orange')
plt.ylabel('Percentage of unresolved customer complaints ')
plt.title('Status of unresolved Customer complaints in different states of America')

In [None]:
#Provide the percentage of complaints resolved till date, 
# which were received through the Internet and customer care calls.

In [None]:
resolved_comp=(df.groupby(['Received Via','Status_cur']).size().unstack().
               fillna(0))

In [None]:
resolved_comp

In [None]:
resolved_comp['resolved']=resolved_comp['Closed']/resolved_comp['Closed'].sum()*100

In [None]:
resolved_comp['resolved']

In [None]:
# Conclusion of the Project:
"""1. It was found that most of the Customer complaints were of the type 'Comcast'.

2. Maximum complaints were from the state of Georgia thus the office
responsible 
for that state must be focussed.

3. About 60% of the customer complaints out of all the complaints
are still unresolved  indicates lapse in service time
and which calls for severe upgradation in after sales team 
and customer support to modify their ways of dealing
with customer complaints.

4. Issues other than internet, service and billing comprised 
of maximum percentage of issues."""