# Helpdesk Hard

In [1]:
library(tidyverse)
library(DBI)
library(getPass)
drv <- switch(Sys.info()['sysname'],
             Windows="PostgreSQL Unicode(x64)",
             Darwin="/usr/local/lib/psqlodbcw.so",
             Linux="PostgreSQL")
con <- dbConnect(
  odbc::odbc(),
  driver = drv,
  Server = "localhost",
  Database = "sqlzoo",
  UID = "postgres",
  PWD = getPass("Password?"),
  Port = 5432
)
options(repr.matrix.max.rows=20)

─ [1mAttaching packages[22m ──────────────────── tidyverse 1.3.0 ─

[32m✔[39m [34mggplot2[39m 3.3.0     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.1     [32m✔[39m [34mdplyr  [39m 0.8.5
[32m✔[39m [34mtidyr  [39m 1.0.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.3.1     [32m✔[39m [34mforcats[39m 0.5.0

─ [1mConflicts[22m ───────────────────── tidyverse_conflicts() ─
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



Password? ····


## 11.
Show the manager and number of calls received for each hour of the day on 2017-08-12

```
+---------+---------------+----+
| Manager | Hr            | cc |
+---------+---------------+----+
| LB1     | 2017-08-12 08 |  6 |
| LB1     | 2017-08-12 09 | 16 |
| LB1     | 2017-08-12 10 | 11 |
| LB1     | 2017-08-12 11 |  6 |
| LB1     | 2017-08-12 12 |  8 |
| LB1     | 2017-08-12 13 |  4 |
| AE1     | 2017-08-12 14 | 12 |
| AE1     | 2017-08-12 15 |  8 |
| AE1     | 2017-08-12 16 |  8 |
| AE1     | 2017-08-12 17 |  7 |
| AE1     | 2017-08-12 19 |  5 |
+---------+---------------+----+
```

In [2]:
shift <- dbReadTable(con, 'Shift')
staff <- dbReadTable(con, 'Staff')
issue <- dbReadTable(con, 'Issue')
shift_type <- dbReadTable(con, 'Shift_type')
level <- dbReadTable(con, 'Level')
customer <- dbReadTable(con, 'Customer')
caller <- dbReadTable(con, 'Caller')

In [3]:
issue %>%
    mutate(calldate=as.Date(Call_date)) %>%
    inner_join(shift, by=c(Taken_by="Operator", 
                           calldate="Shift_date")) %>%
    filter(as.Date(Call_date)==as.Date('2017-8-12')) %>%
    mutate(Hr=format(Call_date, '%Y-%m-%d %H')) %>%
    group_by(Manager, Hr) %>%
    tally %>%
    arrange(Hr)

Manager,Hr,n
<chr>,<chr>,<int>
LB1,2017-08-12 08,6
LB1,2017-08-12 09,16
LB1,2017-08-12 10,11
LB1,2017-08-12 11,6
LB1,2017-08-12 12,8
LB1,2017-08-12 13,4
AE1,2017-08-12 14,12
AE1,2017-08-12 15,8
AE1,2017-08-12 16,8
AE1,2017-08-12 17,7


## 12.
**80/20 rule. It is said that 80% of the calls are generated by 20% of the callers. Is this true? What percentage of calls are generated by the most active 20% of callers.**

Note - Andrew has not managed to do this in one query - but he believes it is possible.

```
+---------+
| t20pc   |
+---------+
| 32.2581 |
+---------+
```

In [4]:
a <- issue %>%
    group_by(Caller_id) %>%
    tally %>% 
    ungroup %>%
    arrange(desc(n)) %>%
    mutate(rn=row_number(n))
100 * (a %>% 
    top_frac(0.2, wt=rn) %>% 
    summarise(x=sum(n))) / 
    (a %>% 
    summarise(x=sum(n)))

x
<dbl>
32.25806


## 13.
**Annoying customers. Customers who call in the last five minutes of a shift are annoying. Find the most active customer who has never been annoying.**

```
+--------------+------+
| Company_name | abna |
+--------------+------+
| High and Co. |   20 |
+--------------+------+
```

In [5]:
suppressPackageStartupMessages(library(lubridate))
bm <- issue %>% 
    mutate(shiftdate=as.Date(Call_date)) %>%
    inner_join(shift, by=c(Taken_by="Operator", shiftdate="Shift_date")) %>%
    inner_join(shift_type, by=c(Shift_type="Shift_type")) %>%
    left_join(caller, by=c(Caller_id="Caller_id")) %>%
    left_join(customer, by=c(Company_ref="Company_ref")) %>%
    mutate(shiftime=ymd_hm(paste(shiftdate, End_time)),
           tdiff=shiftime-Call_date) %>%
    filter(tdiff<=dminutes(5)) %>%
    distinct(Company_ref) %>%
    pull
issue %>%
    inner_join(caller, by=c(Caller_id="Caller_id")) %>%
    inner_join(customer, by=c(Company_ref="Company_ref")) %>%
    filter(! Company_ref %in% bm) %>%
    group_by(Company_name) %>%
    tally(name="abna") %>%
    ungroup %>%
    arrange(desc(abna)) %>%
    slice(1)

Company_name,abna
<chr>,<int>
High and Co.,20


## 14.
**Maximal usage. If every caller registered with a customer makes a call in one day then that customer has "maximal usage" of the service. List the maximal customers for 2017-08-13.**

```
+-------------------+--------------+-------------+
| company_name      | caller_count | issue_count |
+-------------------+--------------+-------------+
| Askew Inc.        |            2 |           2 |
| Bai Services      |            2 |           2 |
| Dasher Services   |            3 |           3 |
| High and Co.      |            5 |           5 |
| Lady Retail       |            4 |           4 |
| Packman Shipping  |            3 |           3 |
| Pitiable Shipping |            2 |           2 |
| Whale Shipping    |            2 |           2 |
+-------------------+--------------+-------------+
```

In [6]:
issue %>% 
    filter(as.Date(Call_date)==as.Date('2017-8-13')) %>%
    right_join(caller, by=c(Caller_id="Caller_id")) %>%
    left_join(customer, by=c(Company_ref="Company_ref")) %>%
    group_by(Company_ref, Company_name, Caller_id) %>%
    summarise(caller_count=n(), issue_count=sum(!is.na(Call_ref))) %>%
    ungroup %>%
    group_by(Company_ref, Company_name) %>%
    summarise(caller_count=sum(caller_count), issue_count=sum(issue_count)) %>%
    filter(caller_count==issue_count) %>%
    arrange(Company_name)

Company_ref,Company_name,caller_count,issue_count
<int>,<chr>,<int>,<int>
149,Askew Inc.,2,2
133,Bai Services,2,2
135,Dasher Services,3,3
146,High and Co.,6,6
140,Lady Retail,5,5
109,Packman Shipping,3,3
128,Pitiable Shipping,2,2
115,Whale Shipping,2,2


## 15.
**Consecutive calls occur when an operator deals with two callers within 10 minutes. Find the longest sequence of consecutive calls – give the name of the operator and the first and last call date in the sequence.**

```
+----------+---------------------+---------------------+-------+
| taken_by | first_call          | last_call           | calls |
+----------+---------------------+---------------------+-------+
| AB1      | 2017-08-14 09:06:00 | 2017-08-14 10:17:00 |    24 |
+----------+---------------------+---------------------+-------+
```

In [7]:
WITH t AS(
-- label consecutive calls 0
  SELECT "Issue".*, 
    CASE WHEN "Call_date" - LAG("Call_date", 1) OVER (
        PARTITION BY "Taken_by" ORDER BY "Call_date")> INTERVAL '10 minute' THEN 1 
         ELSE 0 END flag
    FROM "Issue"
), g AS (
-- cumsum the flags for grouping
  SELECT t.*, SUM(t.flag) OVER (
      PARTITION BY t."Taken_by" ORDER BY t."Call_date") AS grp
    FROM t
), rslt AS (
-- aggregate
  SELECT "Taken_by", grp, MIN("Call_date") first_call, 
    MAX("Call_date") last_call, COUNT("Caller_id") n_calls
    FROM g
    GROUP BY "Taken_by", grp
)
SELECT "Taken_by", first_call, last_call, n_calls
  FROM rslt WHERE n_calls=(SELECT MAX(n_calls) FROM rslt);



ERROR: Error in parse(text = x, srcfile = src): <text>:1:6: unexpected symbol
1: WITH t
         ^


In [None]:
a <- issue %>%
    group_by(Taken_by) %>%
    mutate(rn=row_number(Call_date))
consec_call <- a %>%
    left_join(a %>%
               mutate(rn=rn-1) %>%
               select(Taken_by, rn, Call_date),
              by=c(Taken_by="Taken_by", rn="rn")) %>%
    mutate(flag=replace_na(
        Call_date.y-Call_date.x > dminutes(10), 0)) %>%
    mutate(grp=cumsum(flag))
consec_call %>%
    group_by(Taken_by, grp) %>%
    summarise(first_call=min(Call_date.x), 
              last_call=max(Call_date.x), 
              n_calls=n()) %>%
    ungroup %>%
    select(-grp) %>%
    top_n(1, wt=n_calls)

In [None]:
dbDisconnect(con)