# Guest House - Hard

In [1]:
# Prerequesites
from pyhive import hive
%load_ext sql
%sql hive://cloudera@quickstart.cloudera:10000/sqlzoo
%config SqlMagic.displaylimit = 20

## 11.
Coincidence. Have two guests with the same surname ever stayed in the hotel on the evening? Show the last name and both first names. Do not include duplicates.

```
+-----------+------------+-------------+
| last_name | first_name | first_name  |
+-----------+------------+-------------+
| Davies    | Philip     | David T. C. |
| Evans     | Graham     | Mr Nigel    |
| Howarth   | Mr George  | Sir Gerald  |
| Jones     | Susan Elan | Mr Marcus   |
| Lewis     | Clive      | Dr Julian   |
| McDonnell | John       | Dr Alasdair |
+-----------+------------+-------------+
```

In [2]:
%%sql
WITH t AS (SELECT * 
  FROM guest JOIN booking ON guest.id=booking.guest_id
)
SELECT DISTINCT b.last_name, b.first_name first_name1, a.first_name first_name2
  FROM t a JOIN t b ON a.last_name = b.last_name
    WHERE (
        ((a.booking_date BETWEEN b.booking_date AND 
          DATE_ADD(b.booking_date, b.nights-1)) OR
         (b.booking_date BETWEEN a.booking_date AND 
          DATE_ADD(a.booking_date, a.nights-1)))
        AND (a.id >= b.id) AND
        (a.first_name <> b.first_name)
    )
    ORDER BY b.last_name

 * hive://cloudera@quickstart.cloudera:10000/sqlzoo
Done.


last_name,first_name1,first_name2
Davies,Philip,David T. C.
Evans,Graham,Mr Nigel
Howarth,Mr George,Sir Gerald
Jones,Susan Elan,Mr Marcus
Lewis,Clive,Dr Julian
McDonnell,John,Dr Alasdair


## 12.
Check out per floor. The first digit of the room number indicates the floor – e.g. room 201 is on the 2nd floor. For each day of the week beginning 2016-11-14 show how many rooms are being vacated that day by floor number. Show all days in the correct order.

```
+------------+-----+-----+-----+
| i          | 1st | 2nd | 3rd |
+------------+-----+-----+-----+
| 2016-11-14 |   5 |   3 |   4 |
| 2016-11-15 |   6 |   4 |   1 |
| 2016-11-16 |   2 |   2 |   4 |
| 2016-11-17 |   5 |   3 |   6 |
| 2016-11-18 |   2 |   3 |   2 |
| 2016-11-19 |   5 |   5 |   1 |
| 2016-11-20 |   2 |   2 |   2 |
+------------+-----+-----+-----+
```

In [3]:
%%sql
SELECT checkout_date i, 
    SUM(CASE WHEN floor='1' THEN 1 ELSE 0 END) AS `1st`, 
    SUM(CASE WHEN floor='2' THEN 1 ELSE 0 END) AS `2nd`, 
    SUM(CASE WHEN floor='3' THEN 1 ELSE 0 END) AS `3rd`
FROM 
(SELECT DATE_ADD(booking_date, nights) checkout_date, SUBSTRING(room_no, 1, 1) floor 
   FROM booking 
   WHERE DATE_ADD(booking_date, nights) BETWEEN '2016-11-14' AND
     DATE_ADD('2016-11-14', 6)
) AS t
GROUP BY checkout_date

 * hive://cloudera@quickstart.cloudera:10000/sqlzoo
Done.


i,1st,2nd,3rd
2016-11-14,5,3,4
2016-11-15,6,4,1
2016-11-16,2,2,4
2016-11-17,5,3,6
2016-11-18,2,3,2
2016-11-19,5,5,1
2016-11-20,2,2,2


In [4]:
%%sql
WITH t AS (
    SELECT DATE_ADD(booking_date, nights) i, 
       SUBSTRING(room_no, 1, 1) floor,
       COUNT(*) n
       FROM booking 
       WHERE DATE_ADD(booking_date, nights) BETWEEN '2016-11-14' AND
          DATE_ADD('2016-11-14', 6)
       GROUP BY DATE_ADD(booking_date, nights), SUBSTRING(room_no, 1, 1)
)
SELECT i,
    COLLECT_SET(kv['1'])[0] AS `1st`,
    COLLECT_SET(kv['2'])[0] AS `2nd`,
    COLLECT_SET(kv['3'])[0] AS `3rd`
    FROM
(SELECT i, MAP(floor, n) kv FROM t) t_
GROUP BY i

 * hive://cloudera@quickstart.cloudera:10000/sqlzoo
Done.


i,1st,2nd,3rd
2016-11-14,5,3,4
2016-11-15,6,4,1
2016-11-16,2,2,4
2016-11-17,5,3,6
2016-11-18,2,3,2
2016-11-19,5,5,1
2016-11-20,2,2,2


## 13.
Free rooms? List the rooms that are free on the day 25th Nov 2016.

```
+-----+
| id  |
+-----+
| 207 |
| 210 |
| 304 |
+-----+
```

In [5]:
%%sql
SELECT id FROM room LEFT JOIN (
    SELECT room_no FROM booking 
    WHERE booking_date <= '2016-11-25' AND 
        DATE_ADD(booking_date, nights-1) >= '2016-11-25') t
    ON room.id=t.room_no
    WHERE t.room_no IS NULL

 * hive://cloudera@quickstart.cloudera:10000/sqlzoo
Done.


id
207
210
304


## 14.
Single room for three nights required. A customer wants a single room for three consecutive nights. Find the first available date in December 2016.

```
+-----+------------+
| id  | MIN(i)     |
+-----+------------+
| 201 | 2016-12-11 |
+-----+------------+
```

In [6]:
%%sql
WITH t AS ( -- Step 1, build a query to show the bookings with the next ones
    SELECT a.room_no, a.booking_date this_booking, a.nights this_nights, 
      MIN(b.booking_date) AS next_booking, 
      DATEDIFF(MIN(b.booking_date), a.booking_date) - a.nights AS diff
    FROM booking a LEFT JOIN booking b ON (a.room_no=b.room_no)
    WHERE a.room_type_requested='single' AND
      DATE_FORMAT(a.booking_date, 'yyyyMM')='201612' AND 
      DATE_FORMAT(b.booking_date, 'yyyyMM')='201612' AND
      a.booking_date < b.booking_date
    GROUP BY a.room_no, a.nights, a.booking_date
)

SELECT t.*, booking.next_nights FROM 
  t LEFT JOIN 
    (SELECT nights next_nights, booking_date, room_no FROM booking) AS booking ON 
    (t.next_booking=booking.booking_date AND booking.room_no=t.room_no)
    ORDER BY room_no, this_booking

 * hive://cloudera@quickstart.cloudera:10000/sqlzoo
Done.


room_no,this_booking,this_nights,next_booking,diff,next_nights
101,2016-12-03,5,2016-12-08,0,2
101,2016-12-08,2,2016-12-10,0,5
101,2016-12-10,5,2016-12-15,0,3
201,2016-12-01,2,2016-12-03,0,4
201,2016-12-03,4,2016-12-07,0,4
301,2016-12-02,2,2016-12-04,0,1
301,2016-12-04,1,2016-12-05,0,5
301,2016-12-05,5,2016-12-12,2,1


In [7]:
%%sql
WITH t AS ( 
    SELECT a.room_no, a.booking_date this_booking,
      a.nights this_nights, 
      MIN(b.booking_date) AS next_booking, 
      DATEDIFF(MIN(b.booking_date), a.booking_date) - a.nights AS diff
    FROM booking a LEFT JOIN booking b ON (a.room_no=b.room_no)
    WHERE a.room_type_requested='single' AND
      DATE_FORMAT(a.booking_date, 'yyyyMM')='201612' AND 
      DATE_FORMAT(b.booking_date, 'yyyyMM')='201612' AND
      a.booking_date < b.booking_date
    GROUP BY a.room_no, a.nights, a.booking_date
), tt AS (
    SELECT t.*, booking.next_nights FROM 
      t LEFT JOIN 
    (SELECT nights next_nights, booking_date, 
      room_no FROM booking) AS booking ON 
    (t.next_booking=booking.booking_date AND 
      booking.room_no=t.room_no)
)
-- Step 2, use this query to filter diff>3 or last checkout date is >3 days prior to end of month.
SELECT * FROM (
    SELECT room_no, 
      DATE_ADD(next_booking, next_nights) i FROM tt 
      WHERE diff >=3 
    UNION ALL
    SELECT room_no, 
      MAX(DATE_ADD(next_booking, next_nights)) i FROM tt 
        GROUP BY room_no
        HAVING DAY(MAX(DATE_ADD(next_booking, next_nights))) < 31-3) c
    ORDER BY c.i
    LIMIT 1

 * hive://cloudera@quickstart.cloudera:10000/sqlzoo
Done.


room_no,i
201,2016-12-11


## 15.
Gross income by week. Money is collected from guests when they leave. For each Thursday in November and December 2016, show the total amount of money collected from the previous Friday to that day, inclusive.

```
+------------+---------------+
| Thursday   | weekly_income |
+------------+---------------+
| 2016-11-03 |          0.00 |
| 2016-11-10 |      12608.94 |
| 2016-11-17 |      13552.56 |
| 2016-11-24 |      12929.69 |
| 2016-12-01 |      11685.14 |
| 2016-12-08 |      13093.79 |
| 2016-12-15 |       8975.87 |
| 2016-12-22 |       1395.77 |
| 2016-12-29 |          0.00 |
| 2017-01-05 |          0.00 |
+------------+---------------+
```

In [8]:
%%sql
WITH income AS (  -- income: gross income by day
    SELECT checkout, amount, 
        CASE WHEN DATE_FORMAT(checkout, 'u')<4 THEN 
            DATE_ADD(checkout, CAST(4-DATE_FORMAT(checkout, 'u') AS INT) )
        WHEN DATE_FORMAT(checkout, 'u')>4 THEN
            DATE_ADD(checkout, CAST(11-DATE_FORMAT(checkout, 'u') AS INT))
        ELSE checkout END thu 
    FROM
    (SELECT a.checkout, SUM(COALESCE(a.amount, 0)) amount FROM
      (SELECT booking_date, DATE_ADD(booking_date, nights) checkout,
        SUM(rate.amount * booking.nights) amount
        FROM booking LEFT JOIN rate ON (
          booking.occupants=rate.occupancy AND 
          booking.room_type_requested=rate.room_type)
        GROUP BY booking_date, DATE_ADD(booking_date, nights)
       UNION ALL
       SELECT booking_date, DATE_ADD(booking_date, nights) checkout,
           SUM(extra.amount) amount
           FROM booking LEFT JOIN extra ON (
                booking.booking_id=extra.booking_id)
           GROUP BY booking.booking_date, DATE_ADD(booking_date, nights)
      ) AS a
    GROUP BY a.checkout) b
), thursdays AS (  -- thursdays: Thursdays in Nov & Dec
    SELECT DATE_ADD("2016-11-1", a.pos) as thu
      FROM (SELECT POSEXPLODE(SPLIT(REPEAT("o", DATEDIFF("2016-12-31", "2016-11-1")), "o"))) a
      WHERE DATE_FORMAT(DATE_ADD('2006-11-1', a.pos), 'u')=5
)

SELECT thursdays.thu thursday, ROUND(SUM(COALESCE(income.amount, 0)), 2) weekly_income
  FROM income RIGHT JOIN thursdays ON (income.thu=thursdays.thu)
    WHERE income.amount IS NULL OR
      income.checkout BETWEEN DATE_SUB(thursdays.thu, 6) AND thursdays.thu 
    GROUP BY thursdays.thu

 * hive://cloudera@quickstart.cloudera:10000/sqlzoo
Done.


thursday,weekly_income
2016-11-03,0.0
2016-11-10,12608.94
2016-11-17,13552.56
2016-11-24,12929.69
2016-12-01,11685.14
2016-12-08,13093.79
2016-12-15,8975.87
2016-12-22,1395.77
2016-12-29,0.0
