# Guest House - Medium

In [1]:
import findspark
import pandas as pd
findspark.init()

SVR = '192.168.31.31'
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql import Window

sc = (SparkSession.builder.appName('app13-2') 
      .master(f'spark://{SVR}:7077') 
      .config('spark.sql.warehouse.dir', f'hdfs://{SVR}:9000/user/hive/warehouse') 
      .config('spark.cores.max', '4') 
      .config('spark.executor.instances', '1') 
      .config('spark.executor.cores', '2') 
      .config('spark.executor.memory', '10g') 
      .enableHiveSupport().getOrCreate())

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [2]:
booking = sc.read.table('sqlzoo.booking')
guest = sc.read.table('sqlzoo.guest')
room = sc.read.table('sqlzoo.room')
rate = sc.read.table('sqlzoo.rate')
extra = sc.read.table('sqlzoo.extra')

## 6.
Ruth Cadbury. Show the total amount payable by guest Ruth Cadbury for her room bookings. You should JOIN to the rate table using room_type_requested and occupants.

```
+--------------------+
| SUM(nights*amount) |
+--------------------+
|             552.00 |
+--------------------+
```

In [3]:
(guest.filter((col('first_name')=='Ruth') & 
              (col('last_name')=='Cadbury'))
 .join(booking, on=(guest['id']==booking['guest_id']))
 .join(rate, on=(col('room_type_requested')==rate['room_type']) & 
                (col('occupants')==rate['occupancy']))
 .groupBy()
 .agg(sum(col('nights') * col('amount')).alias('SUM(nights*amount)'))
 .toPandas())

                                                                                

Unnamed: 0,SUM(nights*amount)
0,552.0


## 7.
Including Extras. Calculate the total bill for booking 5346 including extras.

```
+-------------+
| SUM(amount) |
+-------------+
|      118.56 |
+-------------+
```

In [4]:
(booking.filter(col('booking_id')==5346)
 .join(rate, on=(booking['occupants']==rate['occupancy']) & 
                (booking['room_type_requested']==rate['room_type']))
 .union(booking.filter(col('booking_id')==5346)
        .join(extra, on='booking_id'))
 .groupBy()
 .agg(sum('amount').alias('SUM(amount)'))
 .toPandas())

Unnamed: 0,SUM(amount)
0,118.56


## 8.
Edinburgh Residents. For every guest who has the word “Edinburgh” in their address show the total number of nights booked. Be sure to include 0 for those guests who have never had a booking. Show last name, first name, address and number of nights. Order by last name then first name.

```
+-----------+------------+---------------------------+--------+
| last_name | first_name | address                   | nights |
+-----------+------------+---------------------------+--------+
| Brock     | Deidre     | Edinburgh North and Leith |      0 |
| Cherry    | Joanna     | Edinburgh South West      |      0 |
| Murray    | Ian        | Edinburgh South           |     13 |
| Sheppard  | Tommy      | Edinburgh East            |      0 |
| Thomson   | Michelle   | Edinburgh West            |      3 |
+-----------+------------+---------------------------+--------+
```

In [5]:
(guest.filter(col('address').like('%Edinburgh%'))
 .join(booking, on=(guest['id']==booking['guest_id']), how='left')
 .na.fill(0, subset='nights')
 .groupBy('id', 'last_name', 'first_name', 'address')
 .agg(sum('nights'))
 .orderBy('last_name', 'first_name')
 .toPandas())

Unnamed: 0,id,last_name,first_name,address,sum(nights)
0,1591,Brock,Deidre,Edinburgh North and Leith,0
1,1599,Cherry,Joanna,Edinburgh South West,0
2,1027,Murray,Ian,Edinburgh South,13
3,1617,Sheppard,Tommy,Edinburgh East,0
4,1457,Thomson,Michelle,Edinburgh West,3


## 9.
How busy are we? For each day of the week beginning 2016-11-25 show the number of bookings starting that day. Be sure to show all the days of the week in the correct order.

```
+------------+----------+
| i          | arrivals |
+------------+----------+
| 2016-11-25 |        7 |
| 2016-11-26 |        8 |
| 2016-11-27 |       12 |
| 2016-11-28 |        7 |
| 2016-11-29 |       13 |
| 2016-11-30 |        6 |
| 2016-12-01 |        7 |
+------------+----------+
```

In [6]:
(booking.filter((col('booking_date') >= '2016-11-25') & 
                (col('booking_date') <= date_add(lit('2016-11-25'), 6)))
 .groupBy('booking_date')
 .agg(count('booking_id').alias('arrivals'))
 .orderBy('booking_date')
 .toPandas())

Unnamed: 0,booking_date,arrivals
0,2016-11-25,7
1,2016-11-26,8
2,2016-11-27,12
3,2016-11-28,7
4,2016-11-29,13
5,2016-11-30,6
6,2016-12-01,7


## 10.
How many guests? Show the number of guests in the hotel on the night of 2016-11-21. Include all occupants who checked in that day but not those who checked out.

```
+----------------+
| SUM(occupants) |
+----------------+
|             39 |
+----------------+
```

In [7]:
(booking.filter((col('booking_date') <= '2016-11-21') &
                (date_add(col('booking_date'), col('nights')) > '2016-11-21'))
 .groupBy()
 .agg(sum('occupants').alias('SUM(occupants'))
 .toPandas())

Unnamed: 0,SUM(occupants
0,39


In [8]:
sc.stop()