In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pyodbc
import re

In [None]:
sql_conn = pyodbc.connect('DRIVER={SQL Server};'
                            'SERVER=L_AAGname;'
                            'DATABASE=database_name;'
                            'Trusted_Connection=yes') 
query = "set transaction isolation level read uncommitted select c.description,b.con_email,a.inc_dincident,a.inc_time from contacts_main b join incidents_main a on a.recordid=b.recordid join code_con_type c on b.con_type=c.code where a.inc_type='PAT' and a.inc_category='MEDIC' and a.inc_organisation='QA' and a.show_employee='Y'"
df = pd.read_sql(query, sql_conn)
#df = df.dropna()
df_columns = list(df.columns)
df_columns[0] = 'employee_involved'
df.columns = df_columns
df['con_email'] = df['con_email'].str.split("@",n=1,expand=True)[0]
df

In [None]:
df = df[df['employee_involved'].str.contains('Employee')]
df['employee_involved'] = df['employee_involved'].str.strip('Employee (')
df['employee_involved'] = df['employee_involved'].str.strip('Registered ')
df['employee_involved'] = df['employee_involved'].str.strip('\)')
df['employee_involved'] = df['employee_involved'].str.replace('administrative','administrative)')
df['employee_involved'] = df['employee_involved'].str.replace(' / ','/')
df['employee_involved'] = df['employee_involved'].str.replace(' and ',' & ')
df['employee_involved'] = df['employee_involved'].str.replace(' staff','')
print(df)

In [None]:
reportedby_counts = df['employee_involved'].value_counts()
plt.figure(figsize=(17,13))
cm = plt.get_cmap('RdYlGn')
cm_subsection = np.linspace(0, 1, len(reportedby_counts.values))
x_range = range(0,len(reportedby_counts.values))
colors = [cm(x) for x in cm_subsection]
reportedby_list = list(reportedby_counts.index)
for i in x_range:
    plt.barh(reportedby_list[-(i+1)],reportedby_counts.values[-(i+1)],color=colors[-(i+1)])
    plt.text(reportedby_counts.values[-(i+1)],i,reportedby_list[-(i+1)])
plt.yticks([])
plt.xlim([0,max(reportedby_counts)+len(reportedby_list[0])])
plt.title("employees involved in medicine patient safety events");

In [None]:
email_counts = df['con_email'].value_counts(ascending=True)
print(email_counts)

In [None]:
df['hour'] = df['inc_time'].str[:2]
df = df.astype({'hour':'int'})
df.head()

In [None]:
total_days = (max(df.inc_dincident)-min(df.inc_dincident)).days
print(total_days)

In [None]:
bins = [i-0.5 for i in range(25)]
data_x = [i for i in range(24)]
data_y_tot,_ = np.histogram(df.hour, bins=bins)

# Compute pie slices
N = 24
width = 2 * np.pi/N
theta = np.linspace(0.0 + width, 2 * np.pi + width, N, endpoint=False)
theta_deg = np.linspace(0.0 + 7.5, 360 + 7.5, N, endpoint=False)
radii = data_y_tot/total_days
radii_err = np.sqrt(data_y_tot)/total_days

staff_counts = df['employee_involved'].value_counts()

ax = plt.subplot(111, projection='polar')
cumulative_bottom = 0
for i,counts in staff_counts.items():
    radii_staff_tot,_ = np.histogram(df[df['employee_involved']==i].hour, bins=bins)
    radii_staff = radii_staff_tot/total_days
    ax.bar(theta, radii_staff, bottom=cumulative_bottom, width=width, label=i);
    cumulative_bottom += radii_staff

ax.bar(theta, radii_err, bottom=cumulative_bottom-radii_err/2,width=width,alpha=0.5,color='none',hatch="////",label='Total Uncertainty');
ax.set_theta_offset(np.pi/2 + width/2)
ax.set_theta_direction(-1)
ax.set_yticklabels([])
plt.xlabel('hour occured')
plt.title('Hourly number of patient medicine safety events at QA',y=1.1)
plt.legend(loc=(1.1,0),title='Staff involved:')
lines, labels = plt.thetagrids( theta_deg, (data_x) )

In [None]:
df['weekday'] = df['inc_dincident'].dt.weekday
total_weeks = (max(df.inc_dincident)-min(df.inc_dincident)).days/7
print(total_weeks)

In [None]:
bins = [i-0.5 for i in range(8)]
data_x = [i for i in range(7)]
data_y_tot,_ = np.histogram(df.weekday.values, bins=bins)
data_y = data_y_tot/total_weeks
data_y_err = np.sqrt(data_y_tot)/total_weeks
per_role_weekday = []
per_role_label = []
per_role_weight = []
for i,counts in staff_counts.items():
    per_role_weekday.append(df[df['employee_involved']==i].weekday.values)
    per_role_label.append(i)
    per_role_weight.append(np.ones(len(df[df['employee_involved']==i].weekday.values))/total_weeks)
plt.hist(weekday_per_role,bins=bins,weights=weights_per_role,stacked=True,label=per_role_label)
plt.bar(data_x,2*data_y_err,bottom=data_y-data_y_err,color='none',alpha=0.5,hatch="////",label='Uncertainty',width=1)
plt.ylim(bottom=0)
ax = plt.gca()
locs, labels = plt.xticks()
plt.xticks(np.arange(7),labels=['Mon','Tue','Wed','Thu','Fri','Sat','Sun'])
plt.xlabel('weekday occured');
plt.ylabel('events per day')
plt.legend(title='Staff involved',loc=(1,0.3))
plt.title('Daily number of medicine patient safety events at QA');