In [1]:
# This scriot has some example code, that consumes the Nyss API and uses the retreived data to calculate some indicators.

# imports
import requests 
from requests.auth import HTTPBasicAuth
import json
import csv
from datetime import timedelta, date

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import numpy as np

%matplotlib inline

ModuleNotFoundError: No module named 'pandas'

In [None]:
# config variables
environment = "prod" # can use demo, local or prod. Defines the environment from which to retrieve data
nationalSocietyId = 2
pageNumber = 1
projectId = 3
yourUsername = "" # the user must have access to the data you want to analyze
yourPassword = ""

In [None]:
# api urls & login 
rootUrl = ""
if environment == "demo":
    rootUrl = "https://demo.rcnyss.org/api"
if environment == "local":
    rootUrl = "http://localhost:5001/api"
if environment == "prod":
    rootUrl = "https://rcnyss.org/api"
if environment == "":
    print("please select an environment by initializing the variable environment to either demo, local or prod.")
    
# api-endpoint nyss
loginURL = rootUrl + "/authentication/login"
geoStructureURL = rootUrl + "/nationalSocietyStructure/get?nationalSocietyId=1"
reportsURL = rootUrl + "/nationalSocietyReport/list"
healthRisksURL = rootUrl + "/healthRisk/list"
dataCollectorsUrl = rootUrl + "/dataCollector/listAll"
dataCollectorsExportToCsvUrl = rootUrl + "/dataCollector/exportToCsv"
dataCollectorsMapOverview = rootUrl + "/dataCollector/mapOverview"
reportExportToCsvUrl = rootUrl + "/report/exportToCsv"
reportAlertsToExcelUrl = rootUrl + "/alert/export"

# defining a params dict for the parameters to be sent to the API 
loginPARAMS = {'username' : yourUsername, 'password' : yourPassword}

In [None]:
# get reports as csv from Nyss API
reports = ""
with requests.Session() as session:
    post = session.post(loginURL, json=loginPARAMS)
    if post.json()['isSuccess'] is True:
        data = {
                  "reportsType": "Main",
                  "status": True,
                  "orderBy": "date",
                  "sortAscending": True,
                  "utcOffset": 0,
                  "area": {
                    "id": 5, # Select the correct id or remove the area variable completely
                    "type": "Region"
                  },
                }
        reports = session.post(reportExportToCsvUrl + "?projectID=" + str(projectId), json = data)
        
cr = csv.reader(reports.content.decode('utf-8').splitlines(), delimiter=',')
reportsList = list(cr)
reports = []
reportsCleaned = []
for index,report in enumerate(reportsList):
    if index == 0:
        continue
    reportDict = {}
    for indexInner, item in enumerate(report):
        reportDict[reportsList[0][indexInner]] = item      
    reports.append(reportDict)

# correct for faulty naming in date key
for report in reports:
    if '\ufeffDate' in report:
        report['Date'] = report['\ufeffDate']
        report.pop('\ufeffDate')

# clean dataset for some reports missing some keys
for index, report in enumerate(reports):
    if report.get('Region') != None:
        reportsCleaned.append(report)


In [None]:
# 1.a.	Results on CBS data in Nyss platform
# Nyss analysis using python dataframes

reportsPd = pd.DataFrame(reportsCleaned)

print('Total number of reports')
print(len(reportsPd[reportsPd['Date'] <= '2020-12-31'].index))

print('Number of reports w/o activity reports')
print(len(reportsPd[(reportsPd['Health risk'] != 'Activity report') & (reportsPd['Date'] <= '2020-12-31')].index))

# create index for region and health risk
reportsIndex = pd.MultiIndex.from_tuples(list(zip(reportsPd['Region'], reportsPd['Health risk'], reportsPd['Date'])))

# create dataframe with the above defined indices
reportsPdGroupedByRegionAndHR = pd.DataFrame(reportsCleaned, index=reportsIndex)

# combine date & time column to one date/time stamp
#reportsPdGroupedByRegionAndHR['datetime'] = pd.to_datetime(reportsPdGroupedByRegionAndHR['Date'] + ' ' + reportsPdGroupedByRegionAndHR['Time'])
#reportsPdGroupedByRegionAndHR.drop(['Time', 'Date'], axis=1)

# exclude activity reports
reportsWithoutAcitivityReports = reportsPdGroupedByRegionAndHR[reportsPdGroupedByRegionAndHR['Health risk'] != 'Activity report'][['Total']]

# set index names
reportsWithoutAcitivityReports.index.names = ['Region', 'Health risk/event', 'Date']
reportsForGraph = reportsWithoutAcitivityReports.groupby(['Region', 'Health risk/event', 'Date']).count()
reportsForGraph = reportsForGraph.reset_index(['Health risk/event', 'Date'])

# Select only reports in 2020
reportsForGraph = reportsForGraph[reportsForGraph['Date'] <= '2020-12-31']

In [None]:
# pie chart of reports separated by health risk/event

params = {'font.size': 22,
          'axes.labelsize': 30}
plt.rcParams.update(params)

data = reportsForGraph.groupby("Health risk/event")["Total"].sum()

def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        val = int(round(pct*total/100.0))
        return '{p:.2f}%  ({v:d})'.format(p=pct,v=val)
    return my_autopct

data.plot.pie(autopct=make_autopct(data), 
              figsize=(20,20), 
              wedgeprops = {'linewidth': 2}, 
              textprops={'size': 'medium'});

In [None]:
# bar chart of reports separated by health risk/event
reportsBarChartPd = pd.DataFrame(reportsForGraph.groupby("Health risk/event")["Total"].sum())
reportsBarChartPd = reportsBarChartPd.reset_index(level='Health risk/event')

sns.set_theme(style="darkgrid")
sns.set(rc={'figure.figsize':(16,8.27)})
plt.rcParams["axes.labelsize"] = 18

# Draw a nested barplot by species and sex

g = sns.barplot(
    data=reportsBarChartPd, 
    x="Health risk/event", y="Total", palette="dark", alpha=.6
)

g.set(xlabel='Health risk / event', 
       ylabel='Number of reports')

plt.xticks(rotation=73,
          fontsize='large')

for index, p in enumerate(g.patches):
    g.annotate('{:.1f} %'.format(p.get_height() / reportsBarChartPd['Total'].sum() * 100), (p.get_x()+0.4, p.get_height()),
                ha='center', va='bottom',
                color= 'black')

plt.show()
    

In [None]:
# get alerts as exce
alertsPd = []
with requests.Session() as session:
    post = session.post(loginURL, json=loginPARAMS)
    if post.json()['isSuccess'] is True:
        data = {
                  "area": {
                    "id": 5, # Select the region of your choice or remove area variable
                    "type": "Region"
                  },
                  "status": "All",
                  "orderBy": "date",
                  "sortAscending": True,
                  "utcOffset": 0
                }
        alertsPd = pd.read_excel(session.post(reportAlertsToExcelUrl + "?projectID=" + str(projectId), json = data).content)

In [None]:
# Alerts breakdown pie chart per health risk/event

print('Total number of alerts in 2020')
print(len(alertsPd[alertsPd['Alert triggered'] <= '2020-12-31 23:59:59'].index))
alertsPd2020 = alertsPd[alertsPd['Alert triggered'] <= '2020-12-31 23:59:59']


# create index for region and health risk
alertsIndex = pd.MultiIndex.from_tuples(list(zip(alertsPd2020['Health risk/event'], alertsPd2020.index)))

# create dataframe with the above defined indices
alertsPdIndexed = pd.DataFrame(list(alertsPd2020.T.to_dict().values()), index=alertsIndex)

alertsPdIndexed.index.names = ["Health risk/event", 'index']
alertsPdIndexed['Number of alerts'] = 1

data = alertsPdIndexed['Number of alerts'].groupby("Health risk/event").sum()

# pie chart
params = {'font.size': 22,
          'axes.labelsize': 30}
plt.rcParams.update(params)

def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        val = int(round(pct*total/100.0))
        return '{p:.2f}%  ({v:d})'.format(p=pct,v=val)
    return my_autopct

data.plot.pie(autopct=make_autopct(data), 
              figsize=(20,20), 
              wedgeprops = {'linewidth': 2}, 
              textprops={'size': 'medium'});

In [None]:
# bar chart of alerts vs health risk/event
alertsBarChartPd = pd.DataFrame(alertsPdIndexed['Number of alerts'].groupby("Health risk/event").sum())
alertsBarChartPd = alertsBarChartPd.reset_index(level='Health risk/event')

sns.set_theme(style="darkgrid")
sns.set(rc={'figure.figsize':(16,8.27)})
plt.rcParams["axes.labelsize"] = 18

# Draw a nested barplot by species and sex

g = sns.barplot(
    data=alertsBarChartPd, 
    x="Health risk/event", y="Number of alerts", palette="dark", alpha=.6
)

g.set(xlabel='Health risk / event', 
       ylabel='Number of alerts')

plt.xticks(rotation=90,
          fontsize='large')

for index, p in enumerate(g.patches):
    g.annotate('{:.1f} %'.format(p.get_height() / alertsBarChartPd['Number of alerts'].sum() * 100), (p.get_x()+0.4, p.get_height()),
                ha='center', va='bottom',
                color= 'black')

plt.show()
    

In [None]:
## i. Completeness of reporting

# get data collectors from Nyss API for specified region
with requests.Session() as session:
    post = session.post(loginURL, json=loginPARAMS)
    if post.json()['isSuccess'] is True:
        data = {
                  "area": {
                    "id": 5, # select the correct region or remove area variable
                    "type": "Region"
                  },
                  "sex": "All",
                  "trainingStatus": "All"
                }
        dcs = session.post(dataCollectorsExportToCsvUrl + "?projectId=" + str(projectId), json = data)

cr = csv.reader(dcs.content.decode('utf-8').splitlines(), delimiter=',')
dcsPd = pd.DataFrame(list(cr))
dcsPd_headers = dcsPd.loc[0]
dcsPd.drop(dcsPd.index[:1], inplace=True)
dcsPd = dcsPd.rename(columns=dcsPd_headers)

In [None]:
# sex & age distribution

dcsPdIndex = pd.MultiIndex.from_tuples(list(zip(dcsPd['Sex'], dcsPd['Birthgroup decade'], np.arange(0, len(dcsPd)))))

dcsPdIndexed = pd.DataFrame(list(dcsPd.T.to_dict().values()), index=dcsPdIndex)
dcsPdIndexed['Count'] = 1

dcPdGraph = pd.DataFrame(columns=('Birthgroup decade', 'Sex', 'Count'))

for birthgroup in dcsPdIndexed['Birthgroup decade'].unique():
    if birthgroup == '2020':
        continue
    for sex in dcsPdIndexed['Sex'].unique():
        if len(dcsPdIndexed[(dcsPdIndexed['Birthgroup decade'] == birthgroup) &
                                   (dcsPdIndexed['Sex'] == sex)].index) > 0:
            temp = pd.DataFrame([[birthgroup, sex, len(dcsPdIndexed.loc[sex, birthgroup].index)]],
                                columns=('Birthgroup decade', 'Sex', 'Count'))
            dcPdGraph = dcPdGraph.append(temp)
dcPdGraph = dcPdGraph.sort_values(by='Birthgroup decade')
dcPdGraph['index'] = np.arange(0, len(dcPdGraph.index))

sns.set_theme(style="darkgrid")
sns.set(rc={'figure.figsize':(16,8.27)})
plt.rcParams["axes.labelsize"] = 18
   
ax = sns.barplot(
    x= dcPdGraph['Birthgroup decade'],
    y= dcPdGraph['Count'],
    hue= dcPdGraph['Sex'])
ax.set(xlabel='Birthgroup decade', 
       ylabel='Number of volunteers')
for index, p in enumerate(ax.patches):
    ax.annotate('{:.1f} %'.format(p.get_height() / dcPdGraph['Count'].sum() * 100), (p.get_x()+0.2, p.get_height()),
                ha='center', va='bottom',
                color= 'black')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Get all locations of data collectors retreived before
dcsLocations = []
index = 1
while True:
    dcsLocations.append([dcsPd.loc[index]['Latitude'], dcsPd.loc[index]['Longitude']])
    index += 1
    if index > 199:
        break
        
dcsLocsPd = pd.DataFrame(dcsLocations)
# Function to iterate through weeks
def daterange(start_date, end_date):
     for n in range(0, int((end_date - start_date).days) + 1, 7):
         yield [start_date + timedelta(n), start_date + timedelta(n + 6)]

start_dt = date(2020, 3, 1)
end_dt = date(2020, 12, 31)
weekPerformances = []
with requests.Session() as session:
        post = session.post(loginURL, json=loginPARAMS)
        if post.json()['isSuccess'] is True:
            for dt in daterange(start_dt, end_dt):
                start = dt[0].strftime("%Y-%m-%d")
                end = dt[1].strftime("%Y-%m-%d")
                jsonResponse = session.get(dataCollectorsMapOverview + "?projectId=" + str(projectId) + "&from=" + start + "&to=" + end).json()
                dcsMap = jsonResponse['value']
                dcsMap = pd.DataFrame(dcsMap['dataCollectorLocations'])
                dcsMapSelectedArea = []
                for dc in dcsMap.index:
                    if str(dcsMap.loc[dc]['location']['latitude']) in dcsLocsPd.values:
                        if str(dcsMap.loc[dc]['location']['longitude']) in dcsLocsPd.values:
                            dcsMapSelectedArea.append(dcsMap.loc[dc])
                dcsMapSelectedArea = pd.DataFrame(dcsMapSelectedArea)
                weekPerformance = { 'start' : start,
                                    'end' : end,
                                    'numberOfDcs' : len(dcsMapSelectedArea.index),
                                    'numberOfReportingDcs' : len(dcsMapSelectedArea.index) - dcsMapSelectedArea['countNotReporting'].sum()
                                }
                weekPerformances.append(weekPerformance)

weekPerformancesPd = pd.DataFrame(weekPerformances)
weekPerformancesPd['completeness'] = weekPerformancesPd['numberOfReportingDcs'] / weekPerformancesPd['numberOfDcs'] * 100

In [None]:
# Completeness per week

weekPerformancesPd = pd.DataFrame(weekPerformances)
weekPerformancesPd['completeness'] = weekPerformancesPd['numberOfReportingDcs'] / weekPerformancesPd['numberOfDcs'] * 100
weekPerformancesPd['epiWeek'] = np.arange(10,54,1)
weekPerformancesPd

sns.set_theme(style="darkgrid")
sns.set(rc={'figure.figsize':(16,8.27)})
plt.rcParams["axes.labelsize"] = 18
   
idx = np.array(list('abcdefg')) 
clrs = ['cadetblue' if (y < max(weekPerformancesPd['completeness'])) else 'red' for y in weekPerformancesPd['completeness'] ]

ax = sns.barplot(
    data= weekPerformancesPd,
    x= weekPerformancesPd['epiWeek'],
    y= weekPerformancesPd['completeness'],
    palette=clrs)

ax.set(xlabel='EPI Week in 2020', 
       ylabel='Completeness [%]')
plt.show()

In [None]:
# ii.	Data quality – format correctness

# Get error reports
errorReports = ""
with requests.Session() as session:
    post = session.post(loginURL, json=loginPARAMS)
    if post.json()['isSuccess'] is True:
        data = {
                  "reportsType": "Main",
                  "isTraining": False,
                  "status": False,
                  "orderBy": "date",
                  "sortAscending": True,
                  "utcOffset": 0,
                  "area": {
                    "id": 5, # select correct regin id or remove area variable
                    "type": "Region"
                  },
                }
        errorReports = session.post(reportExportToCsvUrl + "?projectID=" + str(projectId), json = data)
        
cr = csv.reader(errorReports.content.decode('utf-8').splitlines(), delimiter=',')
errorReportsList = list(cr)
errorReports = []
errorReportsCleaned = []
for index, report in enumerate(errorReportsList):
    if index == 0:
        continue
    reportDict = {}
    for indexInner, item in enumerate(report):
        reportDict[errorReportsList[0][indexInner]] = item      
    errorReports.append(reportDict)
# correct for faulty naming in date key
for report in errorReports:
    if '\ufeffDate' in report:
        report['Date'] = report['\ufeffDate']
        report.pop('\ufeffDate')

# clean dataset for some reports missing some keys
for index, report in enumerate(errorReports):
    if report.get('Region')!=None:
        errorReportsCleaned.append(report)
errorReportsPd = pd.DataFrame(errorReportsCleaned)

# Drop unnecessary columns
errorReportsPd = errorReportsPd.drop(['Zone', 'Health risk', 'Male 0–4 years', 'Total below 5', 'Total 5 or older', 'Total males', 'Total females', 'Total', 'Male 5 years or older', 'Female 0–4 years', 'Female 5 years and older', 'Location'], axis=1)

# Select only those in 2020
errorReportsPd = errorReportsPd[errorReportsPd['Date'] <= '2020-12-31']

In [None]:
# get reports with unknown phone number
reportsUnknownSender = []
with requests.Session() as session:
    post = session.post(loginURL, json=loginPARAMS)
    if post.json()['isSuccess'] is True:
        pageNumber = 1
        data = {
                  "reportsType": "UnknownSender",
                  "status": False,
                  "orderBy": "date",
                  "sortAscending": True,
                  "utcOffset": 0
                }
        while True:
            response = session.post(reportsURL + "?nationalSocietyId=" + str(nationalSocietyId) + "&pageNumber=" + str(pageNumber), json = data).json()['value']
            if (len(response['data']) > 0):
                for report in response['data']:
                    reportsUnknownSender.append(report)
                pageNumber += 1
            else:
                break
        pageNumber = 1
        
reportsUnknownSenderPd = pd.DataFrame(reportsUnknownSender)
reportsUnknownSenderPd = reportsUnknownSenderPd[reportsUnknownSenderPd['dateTime'] <= '2020-12-31T23:59:59']

In [None]:
## iii.	Accuracy of reporting on health risks / events

# analysis on escalated alerts
len(alertsPd2020[alertsPd2020['Alert escalated'].isnull() != True])

alertsPd2020[alertsPd2020['Alert escalated'].isnull() != True]
reportsWithAlerts = reportsPd[(reportsPd['Health risk'] != 'Activity report') &
                              (reportsPd['Date'] <= '2020-12-31') &
                              (reportsPd['Alert Id'] != '')]


reportsWithAlertsIndex = pd.MultiIndex.from_tuples(list(zip(reportsWithAlerts['Health risk'], reportsWithAlerts['Report status'], np.arange(0, len(reportsWithAlerts)))))

reportsWithAlerts = pd.DataFrame(list(reportsWithAlerts.T.to_dict().values()), index=reportsWithAlertsIndex)

# Remove weirdly remaining nan values
for index, report in enumerate(reportsWithAlerts['Alert Id']):
    if isinstance(report,str):
        continue
    else:
        reportsWithAlerts.drop(index, axis=0, level=2, inplace=True)
        
reportsWithAlerts = reportsWithAlerts.droplevel(2)

In [None]:
## iii. a. Alerts

alertsPd2020[alertsPd2020['Alert escalated'].isnull() != True]

alertsPd2020Graph = pd.DataFrame(columns=['Health risk/event', 'Alert state', 'Count'])
for healthrisk in alertsPd2020['Health risk/event'].unique():
    for alertstate in ['Alert escalated', 'Alert closed', 'Alert dismissed']:
        temp = pd.DataFrame(
        [[healthrisk, alertstate, alertsPd2020[(alertsPd2020[alertstate].isnull() != True) &
                                               (alertsPd2020['Health risk/event'] == healthrisk)][alertstate].count()
                                                ]],
        columns=['Health risk/event', 'Alert state', 'Count'])
        alertsPd2020Graph = alertsPd2020Graph.append(temp)
            
sns.set_theme(style="darkgrid")
plt.rcParams["axes.labelsize"] = 18

# Draw a nested barplot by species and sex

g = sns.catplot(
    data=alertsPd2020Graph, 
    x="Health risk/event", y="Count", kind="bar", hue="Alert state", palette="dark", alpha=.6, 
    height=8, aspect=2
)

g.set(xlabel='Health risk / event', 
       ylabel='Number of alerts')

g.despine(left=True)
plt.xticks(rotation=45,
          fontsize='large')

        
plt.show()

allAlerts = float(alertsPd2020Graph[alertsPd2020Graph['Alert state'] != 'Alert closed'].sum().values[-1])

percentageDismissedAlerts = float(alertsPd2020Graph[alertsPd2020Graph['Alert state'] == 'Alert dismissed'].sum().values[-1]) / allAlerts * 100
percentageEscalatedAlerts = float(alertsPd2020Graph[alertsPd2020Graph['Alert state'] == 'Alert escalated'].sum().values[-1])

countDismissedAlerts = alertsPd2020Graph[alertsPd2020Graph['Alert state'] == 'Alert dismissed'].sum()
percentageEscalatedAlerts

In [None]:
sns.set_theme(style="darkgrid")
sns.set(rc={'figure.figsize':(16,8.27)})
plt.rcParams["axes.labelsize"] = 18

epiWeeks = np.arange('2020-03-01', '2020-12-31', step=7, dtype='datetime64[D]')
epiWeeks = epiWeeks.astype('datetime64[ns]')

g = sns.histplot(
    data=alertsPd2020[(alertsPd2020['Alert escalated'].isnull() != True) & 
                      (alertsPd2020['Health risk/event'] == 'Cough and difficulty breathing')], 
    x="Alert triggered", hue="Health risk/event", bins=mpl.dates.date2num(epiWeeks), palette="dark", alpha=.6,
    multiple='dodge'
)

g.set(xlabel='Epi week', 
       ylabel='Number of alerts')

plt.xticks(ticks=epiWeeks, labels=np.arange(10,54), rotation=45,
          fontsize='large', x=10000.0
          )

            
plt.show()

In [None]:
sns.set_style("white")
sns.set(rc={'figure.figsize':(16,8.27)})
plt.rcParams["axes.labelsize"] = 18

g = sns.histplot(
    data=reportsPd[(reportsPd['EPI Year'] == '2020') &
          ((reportsPd['Report status'] != 'Dismissed')) &
          (reportsPd['Health risk'] == 'Acute malnutrition')], 
    x="EPI Week", palette="dark", hue="Health risk", alpha=.6,
)

g.set(xlabel='Epi week', 
      ylabel='Number of kept reports')

plt.yticks(ticks=np.arange(1,30,1))
            
plt.show()

In [None]:
## iii. b. Reports ( report state closed is pendings that were not assessed before the alert was closed)

# Comparison on report states / health risks/events 
alertReportStatesGroupedByHealthRiskPd = pd.DataFrame(columns=['HR', 'Report status', 'Count', 'percentage'])
for healthrisk in reportsWithAlerts['Health risk'].unique():
    for reportstate in reportsWithAlerts['Report status'].unique():
        if reportsWithAlerts.index.isin([(healthrisk, reportstate)]).any():
            temp = pd.DataFrame(
            [[healthrisk, 
              reportstate, 
              reportsWithAlerts.loc[healthrisk, reportstate]['Total'].count(),
              reportsWithAlerts.loc[healthrisk, reportstate]['Total'].count() / reportsWithAlerts.loc[healthrisk]['Total'].count() * 100]],
            columns=['HR', 'Report status', 'Count', 'percentage'])
            alertReportStatesGroupedByHealthRiskPd = alertReportStatesGroupedByHealthRiskPd.append(temp)
        else:
            temp = pd.DataFrame(
            [[healthrisk, 
              reportstate, 
              0,
              0]],
            columns=['HR', 'Report status', 'Count', 'percentage'])
            alertReportStatesGroupedByHealthRiskPd = alertReportStatesGroupedByHealthRiskPd.append(temp)

alertReportStatesGroupedByHealthRiskPd = alertReportStatesGroupedByHealthRiskPd.sort_values('Report status', ascending=False)
alertReportStatesGroupedByHealthRiskPd['index'] = np.arange(0, len(alertReportStatesGroupedByHealthRiskPd.index))
alertReportStatesGroupedByHealthRiskPd = alertReportStatesGroupedByHealthRiskPd.set_index('index')

sns.set_theme(style="ticks")
plt.rcParams["axes.labelsize"] = 18
f, ax = plt.subplots(figsize=(16, 7))

# Draw a nested barplot by species and sex

ax = sns.barplot(
    data=alertReportStatesGroupedByHealthRiskPd, 
    x="HR", y="Count", hue="Report status"
)

ax.set(xlabel='Health risk / event', 
       ylabel='Number of reports')

for index, p in enumerate(ax.patches):
    percentage = alertReportStatesGroupedByHealthRiskPd.loc[index]['percentage']
    ax.annotate('{:.1f} %'.format(float(percentage)), (p.get_x() + 0.14, p.get_height() + 1),
                ha='center', va='bottom',
                color= 'black',
                rotation = 'vertical')

plt.xticks(rotation=45,
          fontsize='large')

plt.yticks(ticks=np.arange(0,150,10))
plt.show()

countDismissedReports = float(alertReportStatesGroupedByHealthRiskPd[alertReportStatesGroupedByHealthRiskPd['Report status'] == 'Dismissed']['Count'].sum())
countKeptDismissedReports = float(alertReportStatesGroupedByHealthRiskPd[alertReportStatesGroupedByHealthRiskPd['Report status'] != 'Closed']['Count'].sum())
countKeptReports = float(alertReportStatesGroupedByHealthRiskPd[alertReportStatesGroupedByHealthRiskPd['Report status'] == 'Kept']['Count'].sum())

countDismissedReports/countKeptDismissedReports

In [None]:
# iv.	Timeliness of verification

timeTriggerDismisal = alertsPd2020[alertsPd2020['Alert dismissed'].isnull() != True]['Alert dismissed'] - alertsPd2020[alertsPd2020['Alert dismissed'].isnull() != True]['Alert triggered']
timeTriggerEscalation = alertsPd2020[alertsPd2020['Alert escalated'].isnull() != True]['Alert escalated'] - alertsPd2020[alertsPd2020['Alert escalated'].isnull() != True]['Alert triggered']

In [None]:
subDataFrameDismissedEscalatedAlerts = alertsPd2020[(alertsPd2020['Alert dismissed'].isnull() != True) |
                                           (alertsPd2020['Alert escalated'].isnull() != True)]
alertTimeliness = pd.DataFrame(columns=['datetime', 'Health risk/event', 'timeliness'])
for index, alert in subDataFrameDismissedEscalatedAlerts.iterrows():
    if not pd.isnull(alert['Alert dismissed']):
        temp = pd.DataFrame([[alert['Alert triggered'], 
                             alert['Health risk/event'], 
                             alert['Alert dismissed'] - alert['Alert triggered']]],
                             columns=['datetime', 'Health risk/event', 'timeliness'])
        alertTimeliness = alertTimeliness.append(temp)
    else:
        temp = pd.DataFrame([[alert['Alert triggered'], 
                             alert['Health risk/event'], 
                             alert['Alert escalated'] - alert['Alert triggered']]],
                             columns=['datetime', 'Health risk/event', 'timeliness'])
        alertTimeliness = alertTimeliness.append(temp)
alertTimeliness['timelinessInHr'] = alertTimeliness['timeliness'].astype('timedelta64[s]') / 3600

In [None]:
alertTimelinessPercentage = pd.DataFrame(columns=['health risk', 'timeframe', 'percentage', 'count'])

for healthrisk in alertTimeliness['Health risk/event'].unique():
    percentageDenominator = alertTimeliness[alertTimeliness['Health risk/event'] == healthrisk]['datetime'].count()
    percentageNumerator = alertTimeliness[(alertTimeliness['timelinessInHr'] <= 3) &
                                          (alertTimeliness['Health risk/event'] == healthrisk)]['datetime'].count()
    temp = pd.DataFrame([[healthrisk,
                          '0 - 3 hours', 
                           percentageNumerator / percentageDenominator * 100,
                         alertTimeliness[(alertTimeliness['timelinessInHr'] <= 3) &
                                          (alertTimeliness['Health risk/event'] == healthrisk)]['datetime'].count()
                         ]], 
                         columns=['health risk', 'timeframe', 'percentage', 'count'])
    alertTimelinessPercentage = alertTimelinessPercentage.append(temp)
    
    percentageNumerator = alertTimeliness[(alertTimeliness['timelinessInHr'] <= 6) &
                                          (alertTimeliness['timelinessInHr'] > 3) &
                                          (alertTimeliness['Health risk/event'] == healthrisk)]['datetime'].count()
    temp = pd.DataFrame([[healthrisk,
                          '3 - 6 hours', 
                           percentageNumerator / percentageDenominator * 100,
                         alertTimeliness[(alertTimeliness['timelinessInHr'] <= 6) &
                                          (alertTimeliness['timelinessInHr'] > 3) &
                                          (alertTimeliness['Health risk/event'] == healthrisk)]['datetime'].count()]], 
                         columns=['health risk', 'timeframe', 'percentage', 'count'])
    alertTimelinessPercentage = alertTimelinessPercentage.append(temp)

    percentageNumerator = alertTimeliness[(alertTimeliness['timelinessInHr'] <= 12) &
                                          (alertTimeliness['timelinessInHr'] > 6) &
                                          (alertTimeliness['Health risk/event'] == healthrisk)]['datetime'].count()
    temp = pd.DataFrame([[healthrisk,
                          '7 - 12 hours', 
                          percentageNumerator / percentageDenominator * 100,
                         alertTimeliness[(alertTimeliness['timelinessInHr'] <= 12) &
                                          (alertTimeliness['timelinessInHr'] > 6) &
                                          (alertTimeliness['Health risk/event'] == healthrisk)]['datetime'].count()
                         ]], 
                         columns=['health risk', 'timeframe', 'percentage', 'count'])
    alertTimelinessPercentage = alertTimelinessPercentage.append(temp)

    percentageNumerator = alertTimeliness[(alertTimeliness['timelinessInHr'] <= 24) &
                                          (alertTimeliness['timelinessInHr'] > 12) &
                                          (alertTimeliness['Health risk/event'] == healthrisk)]['datetime'].count() 
    temp = pd.DataFrame([[healthrisk,
                          '13 - 24 hours', 
                          percentageNumerator / percentageDenominator * 100,
                         alertTimeliness[(alertTimeliness['timelinessInHr'] <= 24) &
                                          (alertTimeliness['timelinessInHr'] > 12) &
                                          (alertTimeliness['Health risk/event'] == healthrisk)]['datetime'].count()
                         ]], 
                         columns=['health risk', 'timeframe', 'percentage', 'count'])
    alertTimelinessPercentage = alertTimelinessPercentage.append(temp)

    percentageNumerator = alertTimeliness[(alertTimeliness['timelinessInHr'] <= 48) &
                                          (alertTimeliness['timelinessInHr'] > 24) &
                                          (alertTimeliness['Health risk/event'] == healthrisk)]['datetime'].count()
    temp = pd.DataFrame([[healthrisk,
                          '25 - 48 hours', 
                          percentageNumerator / percentageDenominator * 100,
                         alertTimeliness[(alertTimeliness['timelinessInHr'] <= 48) &
                                          (alertTimeliness['timelinessInHr'] > 24) &
                                          (alertTimeliness['Health risk/event'] == healthrisk)]['datetime'].count()]], 
                         columns=['health risk', 'timeframe', 'percentage', 'count'])
    alertTimelinessPercentage = alertTimelinessPercentage.append(temp)

    percentageNumerator = alertTimeliness[(alertTimeliness['timelinessInHr'] > 48) &
                                          (alertTimeliness['Health risk/event'] == healthrisk)]['datetime'].count()
    temp = pd.DataFrame([[healthrisk,
                          '> 48 hours', 
                          percentageNumerator / percentageDenominator * 100,
                         alertTimeliness[(alertTimeliness['timelinessInHr'] > 48) &
                                          (alertTimeliness['Health risk/event'] == healthrisk)]['datetime'].count()]], 
                         columns=['health risk', 'timeframe', 'percentage', 'count'])
    alertTimelinessPercentage = alertTimelinessPercentage.append(temp)
    
#alertTimelinessPercentage = alertTimelinessPercentage.sort_values(by='timeframe')
alertTimelinessPercentage['index'] = np.arange(0, len(alertTimelinessPercentage.index))
alertTimelinessPercentage = alertTimelinessPercentage.set_index('index')
    
sns.set_theme(style="ticks")
f, ax = plt.subplots(figsize=(16, 5))
sns.despine(f)

ax = sns.barplot(
    data=alertTimelinessPercentage, 
    x="timeframe", 
    y="count",
    hue="health risk",
    edgecolor=".3",
    linewidth=.5
)

ax.set(xlabel='Time interval from alert trigger to verification', 
       ylabel='Number of alerts')

for index, p in enumerate(ax.patches):
    percentage = alertTimelinessPercentage.loc[index]['percentage']
    if percentage == 0.000000:
        continue
    ax.annotate('{:.1f} %'.format(percentage), (p.get_x() + 0.1, p.get_height() + 1),
                ha='center', va='bottom',
                color= 'black',
                rotation = 'vertical')

plt.xticks(rotation=45,
          fontsize='large')

        
plt.show()


In [None]:

alertTimelinessTwentyFourPd = pd.DataFrame(columns=['timeframe', 'percentage', 'count'])
temp = pd.DataFrame([['below 12 hours',
                      alertTimelinessPercentage[(alertTimelinessPercentage['timeframe'] != '25 - 48 hours') &
                                                (alertTimelinessPercentage['timeframe'] != '> 48 hours') &
                                                (alertTimelinessPercentage['timeframe'] != '13 - 24 hours')]['count'].sum() / alertTimelinessPercentage['count'].sum() * 100,
                      alertTimelinessPercentage[(alertTimelinessPercentage['timeframe'] != '25 - 48 hours') &
                                                (alertTimelinessPercentage['timeframe'] != '> 48 hours')&
                                                (alertTimelinessPercentage['timeframe'] != '13 - 24 hours')]['count'].sum()]],
                    columns=['timeframe', 'percentage', 'count'])

tempTwo = pd.DataFrame([['above 12 hours',
                        alertTimelinessPercentage[((alertTimelinessPercentage['timeframe'] == '25 - 48 hours') |
                                                   (alertTimelinessPercentage['timeframe'] == '> 48 hours') |
                                                   (alertTimelinessPercentage['timeframe'] == '13 - 24 hours'))]['count'].sum() / alertTimelinessPercentage['count'].sum() * 100,
                        alertTimelinessPercentage[((alertTimelinessPercentage['timeframe'] == '25 - 48 hours') |
                                                   (alertTimelinessPercentage['timeframe'] == '> 48 hours') |
                                                   (alertTimelinessPercentage['timeframe'] == '13 - 24 hours'))]['count'].sum()]],
                    columns=['timeframe', 'percentage', 'count'])
alertTimelinessTwentyFourPd = alertTimelinessTwentyFourPd.append(temp)
alertTimelinessTwentyFourPd = alertTimelinessTwentyFourPd.append(tempTwo)
    

In [None]:
sns.set_theme(style="ticks")
f, ax = plt.subplots(figsize=(16, 5))
sns.despine(f)

g = sns.histplot(
    alertTimeliness[alertTimeliness['timelinessInHr'] <= 72],
    x="timelinessInHr", hue="Health risk/event",
    multiple="stack",
    edgecolor=".3",
    linewidth=.5,
    binwidth=6
)

g.set(xlabel='Timeliness with 6 hour bins', 
       ylabel='Number of alerts')

In [None]:
alertTimeliness.sort_values('timelinessInHr', ascending=False).head(10)

In [None]:
# v.	Effectiveness

alertsClosingPd = pd.DataFrame(columns=['Health risk/event', 'Reason for closing alert', 'Count'])
for healthrisk in alertsPd2020['Health risk/event'].unique():
    for closingReason in alertsPd2020[(alertsPd2020['Reason for closing alert'].isnull() != True) &
                                      (alertsPd2020['Health risk/event'] == healthrisk)]['Reason for closing alert'].unique():
        temp = pd.DataFrame([[healthrisk,
                              closingReason,
                             (alertsPd2020[(alertsPd2020['Reason for closing alert'].isnull() != True) &
                             (alertsPd2020['Reason for closing alert'] == closingReason) &
                             (alertsPd2020['Health risk/event'] == healthrisk)]['Reason for closing alert'].count())]],
                             columns=['Health risk/event', 'Reason for closing alert', 'Count'])
        alertsClosingPd = alertsClosingPd.append(temp)
        
sns.set_theme(style="ticks")
f, ax = plt.subplots(figsize=(16, 7))
sns.despine(f)

g = sns.barplot(
    data=alertsClosingPd, 
    x="Health risk/event", 
    y="Count",
    hue="Reason for closing alert",
    edgecolor=".3",
    linewidth=.5
)

g.set(xlabel='Health risk/event', 
       ylabel='Number of closed alerts')

plt.xticks(rotation=45,
          fontsize='large')

        
plt.show()

In [None]:
alertsEscalated = len(alertsPd2020[alertsPd2020['Alert escalated'].isnull() != True].index)

alertsEscalatedAndDocumented = len(alertsPd2020[(alertsPd2020['Alert escalated'].isnull() != True) &
                 (alertsPd2020['Reason for closing alert'].isnull() != True)].index)

percentageOfDocumented = alertsEscalatedAndDocumented / alertsEscalated * 100

alertsClosingPd[alertsClosingPd['Reason for closing alert'] == 'ActionTaken']['Count'].sum() / alertsClosingPd['Count'].sum()


In [None]:
alertsClosingComments = pd.DataFrame(columns=['Health risk/event', 'Comment', 'Count'])
for healthrisk in alertsPd2020[(alertsPd2020['Reason for closing alert'] == 'ActionTaken') &
             (alertsPd2020['Comment / note'].isnull() != True)]['Health risk/event'].unique():
    for closingComment in alertsPd2020[(alertsPd2020['Reason for closing alert'] == 'ActionTaken') &
                 (alertsPd2020['Comment / note'].isnull() != True)]['Comment / note'].unique():
        temp = pd.DataFrame([[healthrisk,
                              closingComment,
                            alertsPd2020[(alertsPd2020['Reason for closing alert'] == 'ActionTaken') &
                                         (alertsPd2020['Health risk/event'] == healthrisk) &
                                         (alertsPd2020['Comment / note'].isnull() != True) &
                                         (alertsPd2020['Comment / note'] == closingComment)]['Comment / note'].count()
                            ]],
                            columns=['Health risk/event', 'Comment', 'Count'])
        alertsClosingComments = alertsClosingComments.append(temp)
    

In [None]:
alertsClosingComments
sns.set_theme(style="ticks")
f, ax = plt.subplots(figsize=(16, 7))
sns.despine(f)

g = sns.barplot(
    data=alertsClosingComments, 
    x="Health risk/event", 
    y="Count",
    hue="Comment",
    edgecolor=".3",
    linewidth=.5
)

g.set_xlabel("Health risk/event",fontsize=20)
g.set_ylabel("Number of closed alerts",fontsize=20)

plt.xticks(fontsize='large')

plt.show()