In [0]:
%sql 
drop database if exists country_club cascade;
create database country_club;
show databases;

databaseName
country_club
default


In [0]:
# File location and type
file_location_bookings = "/FileStore/tables/Bookings.csv"
file_location_facilities = "/FileStore/tables/Facilities.csv"
file_location_members = "/FileStore/tables/Members.csv"

file_type = "csv"

# CSV options
infer_schema = "true"
first_row_is_header = "true"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
bookings_df = (spark.read.format(file_type) 
                    .option("inferSchema", infer_schema) 
                    .option("header", first_row_is_header) 
                    .option("sep", delimiter) 
                    .load(file_location_bookings))

facilities_df = (spark.read.format(file_type) 
                      .option("inferSchema", infer_schema) 
                      .option("header", first_row_is_header) 
                      .option("sep", delimiter) 
                      .load(file_location_facilities))

members_df = (spark.read.format(file_type) 
                      .option("inferSchema", infer_schema) 
                      .option("header", first_row_is_header) 
                      .option("sep", delimiter) 
                      .load(file_location_members))

In [0]:
print('Bookings Schema')
bookings_df.printSchema()
print('Facilities Schema')
facilities_df.printSchema()
print('Members Schema')
members_df.printSchema()

In [0]:
permanent_table_name_bookings = "country_club.Bookings"
bookings_df.write.format("parquet").saveAsTable(permanent_table_name_bookings)

permanent_table_name_facilities = "country_club.Facilities"
facilities_df.write.format("parquet").saveAsTable(permanent_table_name_facilities)

permanent_table_name_members = "country_club.Members"
members_df.write.format("parquet").saveAsTable(permanent_table_name_members)

In [0]:
%sql
use country_club;
REFRESH table bookings;
REFRESH table facilities;
REFRESH table members;
show tables;


database,tableName,isTemporary
country_club,bookings,False
country_club,facilities,False
country_club,members,False


In [0]:
%sql
select * from bookings limit 3

bookid,facid,memid,starttime,slots
0,3,1,2012-07-03 11:00:00,2
1,4,1,2012-07-03 08:00:00,2
2,6,0,2012-07-03 18:00:00,2


In [0]:
#Q1: Some of the facilities charge a fee to members, but some do not. Please list the names of the facilities that do.

In [0]:
%sql
select * from facilities where membercost > 0

facid,name,membercost,guestcost,initialoutlay,monthlymaintenance
0,Tennis Court 1,5.0,25.0,10000,200
1,Tennis Court 2,5.0,25.0,8000,200
4,Massage Room 1,9.9,80.0,4000,3000
5,Massage Room 2,9.9,80.0,4000,3000
6,Squash Court,3.5,17.5,5000,80


In [0]:
#Q2: How many facilities do not charge a fee to members?

In [0]:
%sql
select COUNT(*) from facilities where membercost = 0

count(1)
4


In [0]:
#Q3: How can you produce a list of facilities that charge a fee to members, where the fee is less than 20% of the facility's monthly maintenance cost?
#Return the facid, facility name, member cost, and monthly maintenance of the facilities in question.

In [0]:
%sql
select facid,name,membercost,monthlymaintenance from facilities where membercost < 0.20*monthlymaintenance AND membercost > 0

facid,name,membercost,monthlymaintenance
0,Tennis Court 1,5.0,200
1,Tennis Court 2,5.0,200
4,Massage Room 1,9.9,3000
5,Massage Room 2,9.9,3000
6,Squash Court,3.5,80


In [0]:
#Q4: How can you retrieve the details of facilities with ID 1 and 5? Write the query without using the OR operator.

In [0]:
%sql
select * from facilities where facid in(1,5)


facid,name,membercost,guestcost,initialoutlay,monthlymaintenance
1,Tennis Court 2,5.0,25.0,8000,200
5,Massage Room 2,9.9,80.0,4000,3000


In [0]:
#Q5: How can you produce a list of facilities, with each labelled as 'cheap' or 'expensive', depending on if their monthly maintenance cost is more than $100?
#Return the name and monthly maintenance of the facilities in question.

In [0]:
%sql
select name, monthlymaintenance, case when monthlymaintenance > 100 then 'expensive' else 'cheap' end as label from facilities

name,monthlymaintenance,label
Tennis Court 1,200,expensive
Tennis Court 2,200,expensive
Badminton Court,50,cheap
Table Tennis,10,cheap
Massage Room 1,3000,expensive
Massage Room 2,3000,expensive
Squash Court,80,cheap
Snooker Table,15,cheap
Pool Table,15,cheap


In [0]:
#Q6: You'd like to get the first and last name of the last member(s) who signed up. Do not use the LIMIT clause for your solution.

In [0]:
%sql
select firstname, surname from members where joindate = (select max(joindate) from members)
                    

firstname,surname
Darren,Smith


In [0]:
#Q7: How can you produce a list of all members who have used a tennis court?
#Include in your output the name of the court, and the name of the member formatted as a single column.
#Ensure no duplicate data
#Also order by the member name.

In [0]:
%sql
select distinct members.firstname || ' ' || members.surname AS fullname, facilities.name
from ((members 
       inner join bookings
       on bookings.memid = members.memid)
       inner join facilities
       on facilities.facid = bookings.facid)
       where (facilities.name = 'Tennis Court 1' OR facilities.name = 'Tennis Court 2') AND members.firstname != 'GUEST'
       order by fullname

fullname,name
Anne Baker,Tennis Court 1
Anne Baker,Tennis Court 2
Burton Tracy,Tennis Court 2
Burton Tracy,Tennis Court 1
Charles Owen,Tennis Court 2
Charles Owen,Tennis Court 1
Darren Smith,Tennis Court 2
David Farrell,Tennis Court 2
David Farrell,Tennis Court 1
David Jones,Tennis Court 2


In [0]:
#Q8: How can you produce a list of bookings on the day of 2012-09-14 which will cost the member (or guest) more than $30?
#Remember that guests have different costs to members (the listed costs are per half-hour 'slot')
#The guest user's ID is always 0.
#Include in your output the name of the facility, the name of the member formatted as a single column, and the cost.
#Order by descending cost, and do not use any subqueries.

In [0]:
%sql
select m.firstname || ' ' || m.surname as fullname,
                 b.starttime,
                 f.name,
                 b.slots,
                 (case
                     when m.memid = 0 then b.slots * f.guestcost
                     else b.slots * f.membercost
                     end) as bookingcost
                from members as m
                inner join bookings as b
                    on m.memid = b.memid
                inner join facilities as f
                    on f.facid = b.facid
                where left(b.starttime, 10) = '2012-09-14'
                and ( ((m.memid = 0) and (b.slots * f.guestcost > 30))
                or
                (b.slots * f.membercost > 30) )
                
                order by bookingcost desc

fullname,starttime,name,slots,bookingcost
GUEST GUEST,2012-09-14 11:00:00,Massage Room 2,4,320.0
GUEST GUEST,2012-09-14 09:00:00,Massage Room 1,2,160.0
GUEST GUEST,2012-09-14 16:00:00,Massage Room 1,2,160.0
GUEST GUEST,2012-09-14 13:00:00,Massage Room 1,2,160.0
GUEST GUEST,2012-09-14 17:00:00,Tennis Court 2,6,150.0
GUEST GUEST,2012-09-14 14:00:00,Tennis Court 2,3,75.0
GUEST GUEST,2012-09-14 19:00:00,Tennis Court 1,3,75.0
GUEST GUEST,2012-09-14 16:00:00,Tennis Court 1,3,75.0
GUEST GUEST,2012-09-14 09:30:00,Squash Court,4,70.0
Jemima Farrell,2012-09-14 14:00:00,Massage Room 1,4,39.6


In [0]:
#Q9: This time, produce the same result as in Q8, but using a subquery.

In [0]:
%sql
select fullname, starttime, slots, facility, bookingcost from
                (
                select m.firstname || ' ' || m.surname as fullname,
                b.starttime as starttime,
                f.name as facility,
                b.slots as slots,
                (case
                    when m.memid = 0 then b.slots*f.guestcost
                    else b.slots * f.membercost
                    end) as bookingcost
                from members as m
                    inner join bookings as b
                        on m.memid = b.memid
                    inner join facilities as f
                        on f.facid = b.facid)
                    where left(starttime, 10) = '2012-09-14'
                    and (bookingcost > 30)
                    
                    order by bookingcost desc

fullname,starttime,slots,facility,bookingcost
GUEST GUEST,2012-09-14 11:00:00,4,Massage Room 2,320.0
GUEST GUEST,2012-09-14 09:00:00,2,Massage Room 1,160.0
GUEST GUEST,2012-09-14 16:00:00,2,Massage Room 1,160.0
GUEST GUEST,2012-09-14 13:00:00,2,Massage Room 1,160.0
GUEST GUEST,2012-09-14 17:00:00,6,Tennis Court 2,150.0
GUEST GUEST,2012-09-14 14:00:00,3,Tennis Court 2,75.0
GUEST GUEST,2012-09-14 19:00:00,3,Tennis Court 1,75.0
GUEST GUEST,2012-09-14 16:00:00,3,Tennis Court 1,75.0
GUEST GUEST,2012-09-14 09:30:00,4,Squash Court,70.0
Jemima Farrell,2012-09-14 14:00:00,4,Massage Room 1,39.6


In [0]:
#Q10: Produce a list of facilities with a total revenue less than 1000.
#The output should have facility name and total revenue, sorted by revenue.
#Remember that there's a different cost for guests and members!

In [0]:
%sql
select facility, totalrevenue from
                (select facility, SUM(bookingcost) as totalrevenue from
                (select 
                    f.guestcost as guestcost,
                    b.starttime as time,
                    f.facid as facid,
                    f.name as facility,
                    f.membercost as membercost,
                    b.slots as slots,
                    m.firstname as name,
                    (case 
                        when m.memid = 0 then b.slots * f.guestcost
                        else b.slots * f.membercost
                        end) as bookingcost
                    from facilities as f
                    inner join bookings as b
                        on f.facid = b.facid
                    inner join members as m
                        on m.memid = b.memid)
                    group by facility)
                    where totalrevenue < 1000
                    order by totalrevenue desc

facility,totalrevenue
Pool Table,270.0
Snooker Table,240.0
Table Tennis,180.0
