# Helpdesk Medium

In [1]:
import findspark
import pandas as pd
findspark.init()

SVR = '192.168.31.31'
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

sc = (SparkSession.builder.appName('app12-2') 
      .master(f'spark://{SVR}:7077') 
      .config('spark.sql.warehouse.dir', f'hdfs://{SVR}:9000/user/hive/warehouse') 
      .config('spark.cores.max', '4') 
      .config('spark.executor.instances', '1') 
      .config('spark.executor.cores', '2') 
      .config('spark.executor.memory', '10g') 
      .enableHiveSupport().getOrCreate())

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [2]:
shift = sc.read.table('sqlzoo.Shift')
staff = sc.read.table('sqlzoo.Staff')
issue = sc.read.table('sqlzoo.Issue')
shift_type = sc.read.table('sqlzoo.Shift_type')
level = sc.read.table('sqlzoo.Level')
customer = sc.read.table('sqlzoo.Customer')
caller = sc.read.table('sqlzoo.Caller')

## 6.
List the Company name and the number of calls for those companies with more than 18 calls.

```
+------------------+----+
| Company_name     | cc |
+------------------+----+
| Gimmick Inc.     | 22 |
| Hamming Services | 19 |
| High and Co.     | 20 |
+------------------+----+
```

In [3]:
(issue.join(caller, 'Caller_id')
 .join(customer, 'Company_ref')
 .groupBy('Company_name')
 .agg(count('Call_ref').alias('cc'))
 .filter(col('cc')>18)
 .toPandas())

                                                                                

Unnamed: 0,Company_name,cc
0,Hamming Services,19
1,Gimmick Inc.,22
2,High and Co.,20


## 7.
Find the callers who have never made a call. Show first name and last name

```
+------------+-----------+
| first_name | last_name |
+------------+-----------+
| David      | Jackson   |
| Ethan      | Phillips  |
+------------+-----------+
```

In [4]:
(caller.join(issue, 'Caller_id', how='left_anti')
 .select('First_name', 'Last_name')
 .toPandas())

Unnamed: 0,First_name,Last_name
0,David,Jackson
1,Ethan,Phillips


## 8.
For each customer show: Company name, contact name, number of calls where the number of calls is fewer than 5

```
+--------------------+------------+-----------+----+
| Company_name       | first_name | last_name | nc |
+--------------------+------------+-----------+----+
| Pitiable Shipping  | Ethan      | McConnell |  4 |
| Rajab Group        | Emily      | Cooper    |  4 |
| Somebody Logistics | Ethan      | Phillips  |  2 |
+--------------------+------------+-----------+----+
```

In [5]:
(issue.join(caller['Caller_id', 'Company_ref'], 'Caller_id')
 .join(customer, 'Company_ref')
 .groupBy('Company_ref', 'Company_name', 'Contact_id')
 .agg(count('Call_ref').alias('nc'))
 .filter(col('nc') < 5)
 .join(caller, (col('Contact_id')==col('Caller_id')))
 .select('Company_name', 'First_name', 'Last_name', 'nc')
 .toPandas())

Unnamed: 0,Company_name,First_name,Last_name,nc
0,Somebody Logistics,Ethan,Phillips,2
1,Rajab Group,Emily,Cooper,4
2,Pitiable Shipping,Ethan,McConnell,4


## 9.
For each shift show the number of staff assigned. Beware that some roles may be NULL and that the same person might have been assigned to multiple roles (The roles are 'Manager', 'Operator', 'Engineer1', 'Engineer2').

```
+------------+------------+----+
| Shift_date | Shift_type | cw |
+------------+------------+----+
| 2017-08-12 | Early      |  4 |
| 2017-08-12 | Late       |  4 |
| 2017-08-13 | Early      |  3 |
| 2017-08-13 | Late       |  2 |
| 2017-08-14 | Early      |  4 |
| 2017-08-14 | Late       |  4 |
| 2017-08-15 | Early      |  4 |
| 2017-08-15 | Late       |  4 |
| 2017-08-16 | Early      |  4 |
| 2017-08-16 | Late       |  4 |
+------------+------------+----+
```

In [6]:
(shift.unpivot(ids=['Shift_date', 'Shift_type'], 
               values=['Manager', 'Operator', 'Engineer1', 'Engineer2'],
               variableColumnName='role', valueColumnName='p')
 .dropDuplicates(subset=['Shift_date', 'Shift_type', 'p'])
 .dropna()
 .groupBy('Shift_date', 'Shift_type')
 .agg(count('role').alias('cw'))
 .orderBy('Shift_date', 'Shift_type')
 .toPandas())

Unnamed: 0,Shift_date,Shift_type,cw
0,2017-08-12,Early,4
1,2017-08-12,Late,4
2,2017-08-13,Early,3
3,2017-08-13,Late,2
4,2017-08-14,Early,4
5,2017-08-14,Late,4
6,2017-08-15,Early,4
7,2017-08-15,Late,4
8,2017-08-16,Early,4
9,2017-08-16,Late,4


## 10.
Caller 'Harry' claims that the operator who took his most recent call was abusive and insulting. Find out who took the call (full name) and when.

```
+------------+-----------+---------------------+
| first_name | last_name | call_date           |
+------------+-----------+---------------------+
| Emily      | Best      | 2017-08-16 10:25:00 |
+------------+-----------+---------------------+
```

In [7]:
(caller.filter(col('First_name')=='Harry').select('Caller_id')
 .join(issue, on='Caller_id')
 .join(staff, on=(issue['Taken_by']==staff['Staff_code']))
 .orderBy(col('Call_date').desc())
 .select('First_name', 'Last_name', 'Call_date')
 .limit(1)
 .toPandas())

Unnamed: 0,First_name,Last_name,Call_date
0,Emily,Best,2017-08-16 10:25:00.0


In [8]:
sc.stop()