In [1]:
%matplotlib notebook

# Import Dependencies
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import csv
import requests
import matplotlib.patches as mpatches
import seaborn as sns
from matplotlib.colors import ListedColormap

In [2]:
# File to Load 
hr_data = pd.read_csv("Output_data/cleansed_dataframe.csv")
hr_data.head()

Unnamed: 0,Case ID,Company,Region,Status,Source,Current Agent,Creation Date,Due Date,Closed Date,Service Group,...,Due_Date,Due Time,Due Year,Due Month,Due Day,Due_day_of_week,Closed_Date,Closed Time,Pended_Date,Pended Time
0,8111665,AU,APAC,Closed,E-mail,MYHRW_JuliusB,11/05/18 22:07,11/20/18 21:00,11/16/18 07:45,MyHRW,...,2018-11-20 00:00:00,21:00:00,2018,11,20,Tuesday,2018-11-16,07:45:00,,
1,8111974,AU,APAC,Closed,E-mail,MyHRW_AdamSu,11/05/18 23:00,11/13/18 22:01,11/05/18 23:21,MyHRW,...,2018-11-13 00:00:00,22:01:00,2018,11,13,Tuesday,2018-11-05,23:21:00,,
2,8112663,AU,APAC,Closed,E-mail,MyHRW_AdamSu,11/06/18 04:18,11/07/18 22:01,11/06/18 04:52,MyHRW,...,2018-11-07 00:00:00,22:01:00,2018,11,7,Wednesday,2018-11-06,04:52:00,,
3,8112727,AU,APAC,Closed,Interface,MyHRW_AdamSu,11/06/18 04:58,11/08/18 22:01,11/06/18 05:04,MyHRW,...,2018-11-08 00:00:00,22:01:00,2018,11,8,Thursday,2018-11-06,05:04:00,,
4,8112910,AU,APAC,Closed,E-mail,MYHRW_JuliusB,11/06/18 06:06,11/13/18 22:01,11/06/18 07:31,MyHRW,...,2018-11-13 00:00:00,22:01:00,2018,11,13,Tuesday,2018-11-06,07:31:00,,


In [3]:
#creating lists for each of the data sets used to plot:  service group type and frequency
service_group_counts= hr_data['Service Group'].value_counts()
service_group_counts_list = service_group_counts.tolist()
service_group_list= service_group_counts.index.tolist()

#creating lists for each of the data sets used to plot:  service center and frequency

service_center_counts= hr_data['Service Center'].value_counts()
service_center_counts_list= service_center_counts.tolist()
service_center_list= service_center_counts.index.tolist()
service_center_list = [sc.replace('US - AMO', 'Washington DC') for sc in service_center_list]

#creating lists for each of the data sets used to plot: due day of week & time and frequency
due_day_of_week_counts=hr_data['Due_day_of_week'].value_counts()
due_day_of_week_list=due_day_of_week_counts.tolist()
due_day_of_week = hr_data['Due_day_of_week']

#creating lists for each of the data sets used to plot: due time and frequency
due_time_counts= hr_data['Due Time'].value_counts()
due_time_list= due_time_counts.tolist()

by_due_day= pd.DataFrame()
by_due_day['Due Day of the Week'] = hr_data['Due_day_of_week']
by_due_day['Due Time'] = hr_data['Due Time']


# creating dataframe for histogram for service center by due day
service_time_by_day= pd.DataFrame()
service_time_by_day['Service Center'] = hr_data['Service Center']
service_time_by_day['Day of the Week'] = hr_data['Due_day_of_week']
service_time_by_day['Time of the day']= hr_data['Due Time']

service_time_by_day = service_time_by_day.set_index('Service Center')


# Create the names for the bins using a list
DAYS = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']


In [4]:

# converting the time data into numerical (in minutes) format for plotting purposes 
time = pd.DatetimeIndex(hr_data['Due Time'])
hr_data['Due Time']= time.hour
# time.hour * 60 + time.minute

#creating new dataframe to use numerical format of time of day, day of the week and service center

new_timedf= pd.DataFrame()
new_timedf['Service Centers']= hr_data['Service Center']
new_timedf['Day of the week']= hr_data['Due_day_of_week']
new_timedf['Hour of day'] = hr_data['Due Time']

new_timedf


Unnamed: 0,Service Centers,Day of the week,Hour of day
0,Manila,Tuesday,21
1,Manila,Tuesday,22
2,Manila,Wednesday,22
3,Manila,Thursday,22
4,Manila,Tuesday,22
5,Manila,Wednesday,22
6,Manila,Monday,22
7,Manila,Wednesday,5
8,Manila,Wednesday,0
9,Manila,Thursday,21


# Times of inquiry plot by service center (plot vs. box plot)

In [5]:
new_timedf.plot(x='Service Centers', y='Hour of day', style='x')

plt.title("Service Centers by hour of day")
plt.xlabel("Service Center")

plt.ylabel("Requests/hour")

<IPython.core.display.Javascript object>

Text(0, 0.5, 'Requests/hour')

In [6]:
new_timedf.boxplot(by='Service Centers', 
                       column=['Hour of day'], 
                       grid=False, showmeans=True, notch= True, bootstrap= 10000)

plt.xlabel("Service Center")
plt.ylabel("Hour of day (based on 24 hour clock)")
plt.title("Hourly fielded inquiries by Service Center")
plt.xticks(rotation='vertical')

# # Save the figure
plt.savefig("Images/x_Hours of inquiries fielded by Service Center")

<IPython.core.display.Javascript object>

# Service Type by Day of the Week

In [7]:
service_timebyday_chart= service_time_by_day['Day of the Week'].value_counts().reindex(DAYS, fill_value=0).plot(kind='bar')

# x= count_dates
# y= count_dates

# date_chart = sns.barplot(x=x, y=y, palette="vlag")

# Set the xlabel and ylabel using class methods
service_timebyday_chart.set_xlabel("Day of the Week")
service_timebyday_chart.set_ylabel("Number of Inquiries")


plt.show()
plt.tight_layout()


# Number of Requests by Type


In [8]:
plt.title("# of Service Requests by Type")
plt.xlabel("Type of Service Request")
plt.ylabel("Number of Requests")
plt.xticks(rotation='vertical')

# # Build a scatter plot for each data type
N = 15
colors = np.random.rand(N)

plt.scatter(service_group_list, service_group_counts_list, 
            s=5*service_group_counts_list, c= colors, 
            edgecolor="black", linewidths=1, marker="o", 
            alpha=0.8, label="Urban")

# # Save the figure
plt.savefig("Images/x_Number of Service Requests by Type.png")

# # Show plot
plt.show()


# Attempt to make a heatmap by three dimensions 

In [9]:
newdf = hr_data[['Service Center','Due_day_of_week', 'Due Day']]

day_array=np.array([newdf['Due_day_of_week']])
                
sc_array=np.array([newdf['Service Center']])
dd_array =np.array([newdf['Due Day']])

fig, ax = plt.subplots()   
im = ax.imshow(day_array)

# We want to show all ticks...
ax.set_xticks(np.arange(len(dd_array)))
ax.set_yticks(np.arange(len(sc_array)))
# ... and label them with the respective list entries
ax.set_xticklabels(dd_array)
ax.set_yticklabels(sc_array)

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

# Loop over data dimensions and create text annotations.
for i in range(len(sc_array)):
    for j in range(len(dd_array)):
        text = ax.text(j, i, dd_array[i, j],
                       ha="center", va="center", color="w")

ax.set_title("Tickets by service center (by days of the week)")
fig.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

TypeError: Image data cannot be converted to float