# Helpdesk Hard

In [1]:
import getpass
import psycopg2
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
pwd = getpass.getpass()
engine = create_engine(
    'postgresql+psycopg2://postgres:%s@192.168.31.31:15432/sqlzoo' % (pwd))
pd.set_option('display.max_rows', 100)

········


In [2]:
shift = pd.read_sql_table('Shift', engine)
staff = pd.read_sql_table('Staff', engine)
issue = pd.read_sql_table('Issue', engine)
shift_type = pd.read_sql_table('Shift_type', engine)
level = pd.read_sql_table('Level', engine)
customer = pd.read_sql_table('Customer', engine)
caller = pd.read_sql_table('Caller', engine)

## 11.
Show the manager and number of calls received for each hour of the day on 2017-08-12

```
+---------+---------------+----+
| Manager | Hr            | cc |
+---------+---------------+----+
| LB1     | 2017-08-12 08 |  6 |
| LB1     | 2017-08-12 09 | 16 |
| LB1     | 2017-08-12 10 | 11 |
| LB1     | 2017-08-12 11 |  6 |
| LB1     | 2017-08-12 12 |  8 |
| LB1     | 2017-08-12 13 |  4 |
| AE1     | 2017-08-12 14 | 12 |
| AE1     | 2017-08-12 15 |  8 |
| AE1     | 2017-08-12 16 |  8 |
| AE1     | 2017-08-12 17 |  7 |
| AE1     | 2017-08-12 19 |  5 |
+---------+---------------+----+
```

In [3]:
(issue.assign(date_call=issue['Call_date'].dt.date)
 .assign(hr=issue['Call_date'].dt.strftime('%Y-%m-%d %H'))
 .astype({'date_call': 'datetime64[ns]'})
 .query('date_call=="2017-08-12"')
 .merge(shift, left_on=['Taken_by', 'date_call'], 
        right_on=['Operator', 'Shift_date'])
 [['Manager', 'hr', 'Call_ref']]
 .groupby(['Manager', 'hr'])
 .count().reset_index()
 .sort_values('hr'))

Unnamed: 0,Manager,hr,Call_ref
5,LB1,2017-08-12 08,6
6,LB1,2017-08-12 09,16
7,LB1,2017-08-12 10,11
8,LB1,2017-08-12 11,6
9,LB1,2017-08-12 12,8
10,LB1,2017-08-12 13,4
0,AE1,2017-08-12 14,12
1,AE1,2017-08-12 15,8
2,AE1,2017-08-12 16,8
3,AE1,2017-08-12 17,7


## 12.
**80/20 rule. It is said that 80% of the calls are generated by 20% of the callers. Is this true? What percentage of calls are generated by the most active 20% of callers.**

Note - Andrew has not managed to do this in one query - but he believes it is possible.

```
+---------+
| t20pc   |
+---------+
| 32.2581 |
+---------+
```

In [4]:
a = (issue.groupby('Caller_id')['Caller_id'].count()
 .sort_values(ascending=False)[:int(issue['Caller_id'].nunique()*0.2)])
pd.DataFrame({'t20pc': [100 * sum(a)/issue.shape[0]]})

Unnamed: 0,t20pc
0,32.258065


## 13.
**Annoying customers. Customers who call in the last five minutes of a shift are annoying. Find the most active customer who has never been annoying.**

```
+--------------+------+
| Company_name | abna |
+--------------+------+
| High and Co. |   20 |
+--------------+------+
```

In [5]:
annoy = (issue.assign(date_call=issue['Call_date'].dt.date)
         .astype({'date_call': 'datetime64[ns]'})
         .merge(shift, left_on=['Taken_by', 'date_call'], right_on=['Operator', 'Shift_date'])
         .merge(shift_type, on='Shift_type')
         .merge(caller, on='Caller_id')
         .merge(customer, on='Company_ref')
         [['Company_ref', 'Shift_date', 'End_time', 'Call_date']])
annoy['Shift_end'] = (annoy['Shift_date'].astype(str).str.cat(annoy['End_time'], sep=' ')
                      .astype('datetime64[ns]'))
annoy = annoy.loc[(annoy['Shift_end']-annoy['Call_date'])/pd.Timedelta(minutes=1) <= 5, :]

a = (issue.merge(caller, on='Caller_id')
     .merge(customer, on='Company_ref'))

(a.loc[~a['Company_ref'].isin(annoy['Company_ref'])]
 .groupby(['Company_ref', 'Company_name'], as_index=True)
 .agg(abna=('Caller_id', 'count'))
 .reset_index()
 .sort_values('abna', ascending=False)
 .iloc[:1, ])

Unnamed: 0,Company_ref,Company_name,abna
42,146,High and Co.,20


## 14.
**Maximal usage. If every caller registered with a customer makes a call in one day then that customer has "maximal usage" of the service. List the maximal customers for 2017-08-13.**

```
+-------------------+--------------+-------------+
| company_name      | caller_count | issue_count |
+-------------------+--------------+-------------+
| Askew Inc.        |            2 |           2 |
| Bai Services      |            2 |           2 |
| Dasher Services   |            3 |           3 |
| High and Co.      |            5 |           5 |
| Lady Retail       |            4 |           4 |
| Packman Shipping  |            3 |           3 |
| Pitiable Shipping |            2 |           2 |
| Whale Shipping    |            2 |           2 |
+-------------------+--------------+-------------+
```

In [6]:
iss = (issue.loc[(issue['Call_date'] >= '2017-08-13') & 
                 (issue['Call_date'] < '2017-08-14'), :]
 .merge(caller, on='Caller_id', how='right')
 .merge(customer, on='Company_ref', how='left')
 .groupby(['Caller_id', 'Company_ref', 'Company_name'], as_index=True)
 .agg(n=('Call_ref', 'count'))
 .reset_index())
iss['iss'] = (iss['n'] > 0).astype(int)

(iss.groupby(['Company_name']).agg(caller_count=('Company_ref', 'count'),
                                   issue_count=('iss', 'sum'))
 .reset_index()
 .query('caller_count==issue_count')
 .sort_values('Company_name'))

Unnamed: 0,Company_name,caller_count,issue_count
1,Askew Inc.,2,2
3,Bai Services,2,2
11,Dasher Services,3,3
20,High and Co.,5,5
26,Lady Retail,4,4
33,Packman Shipping,3,3
36,Pitiable Shipping,2,2
49,Whale Shipping,2,2


## 15.
**Consecutive calls occur when an operator deals with two callers within 10 minutes. Find the longest sequence of consecutive calls – give the name of the operator and the first and last call date in the sequence.**

```
+----------+---------------------+---------------------+-------+
| taken_by | first_call          | last_call           | calls |
+----------+---------------------+---------------------+-------+
| AB1      | 2017-08-14 09:06:00 | 2017-08-14 10:17:00 |    24 |
+----------+---------------------+---------------------+-------+
```

In [8]:
t = issue.copy()
t['lag'] = (issue.sort_values(['Taken_by', 'Call_date'])
            .groupby('Taken_by')['Call_date'].shift(1))
t['flag'] = (t['Call_date'] - t['lag'] > pd.Timedelta(minutes=10)).astype(int)
t['grp'] = t.groupby('Taken_by')['flag'].cumsum()
t = (t.groupby(['Taken_by', 'grp']).agg(first_call=('Call_date', 'min'),
                                       last_call=('Call_date', 'max'),
                                       n_calls=('Caller_id', 'count'))
     .reset_index())
t.sort_values('n_calls', ascending=False).iloc[:1, :]

Unnamed: 0,Taken_by,grp,first_call,last_call,n_calls
2,AB1,2,2017-08-14 09:06:00,2017-08-14 10:17:00,24
