### Intro
This is a Python notebook to explain and try out more advanced Python SQL interactions

In [5]:
#1. commands beginning with '%' are sent to the terminal (Cool!)
#2. ipython-sql allows you to specify sql commands in-line (not as strings) by using '%' or '%%' (see below)
%pip install ipython-sql


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:
#The percent operator allows you to load extensions to make the Python shell interpret commands differently.
#More info: https://ipython.readthedocs.io/en/stable/config/extensions/index.html
%load_ext sql

In [2]:
# Note: port 5432 is the default used by postgres

db_host = '127.0.0.1'
db = 'pagila'
db_user = 'basic_user'
db_password = 'password'
db_port = '5432'

connection = f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db}'
print(connection)

postgresql://basic_user:password@127.0.0.1:5432/pagila


In [11]:
#Connecting to Database
%sql $connection

In [14]:
'''
Notes on '%' operator when using sql commands:
1. Each command preceded by '%' will be interpreted by the terminal
2. You can use '$' to insert Python variables into these single-line commands
3. You can also write multi-line commands with '%%', but these do not support inserting Python variables with '$'
'''
%sql SELECT * FROM store;

 * postgresql://basic_user:***@127.0.0.1:5432/pagila
2 rows affected.


store_id,manager_staff_id,address_id,last_update
1,1,1,2006-02-15 09:57:12
2,2,2,2006-02-15 09:57:12


In [34]:
# Showing an overview of how much data we have:

#%sql SELECT * FROM information_schema.tables WHERE table_schema = 'public';
tables = ['film','store','customer','rental','payment','staff','city','country']
for tbl in tables:
    data = %sql SELECT COUNT(*) FROM $tbl;
    print(f'{tbl}\t' + str(data[0][0]))

 * postgresql://basic_user:***@127.0.0.1:5432/pagila
1 rows affected.
film	1000
 * postgresql://basic_user:***@127.0.0.1:5432/pagila
1 rows affected.
store	2
 * postgresql://basic_user:***@127.0.0.1:5432/pagila
1 rows affected.
customer	599
 * postgresql://basic_user:***@127.0.0.1:5432/pagila
1 rows affected.
rental	16044
 * postgresql://basic_user:***@127.0.0.1:5432/pagila
1 rows affected.
payment	32098
 * postgresql://basic_user:***@127.0.0.1:5432/pagila
1 rows affected.
staff	2
 * postgresql://basic_user:***@127.0.0.1:5432/pagila
1 rows affected.
city	600
 * postgresql://basic_user:***@127.0.0.1:5432/pagila
1 rows affected.
country	109


In [35]:
#Get range of all payment dates:
%%sql
SELECT min(payment_date) as start, max(payment_date) as end from payment;

 * postgresql://basic_user:***@127.0.0.1:5432/pagila
1 rows affected.


start,end
2007-01-24 21:21:56.996577,2007-05-14 13:44:29.996577


#### See all districts where customers are living

In [40]:
%%sql
SELECT district, SUM(city_id) as count
FROM address
GROUP BY district
ORDER BY count DESC
LIMIT 20;

 * postgresql://basic_user:***@127.0.0.1:5432/pagila
20 rows affected.


district,count
Shandong,3237
England,2974
So Paulo,2952
West Bengali,2623
Buenos Aires,2572
Uttar Pradesh,2462
California,2444
Southern Tagalog,1931
Tamil Nadu,1807
Hubei,1790


#### What are the top-grossing movies in this database?
So what are the movies which, in aggregate, have earned the most money?

In [54]:
%%sql
SELECT SUM(payment.amount) as income, film.film_id, film.title
FROM payment
JOIN rental ON payment.rental_id = rental.rental_id
JOIN inventory ON rental.inventory_id = inventory.inventory_id
JOIN film ON inventory.film_id = film.film_id
GROUP BY film.film_id, film.title
ORDER BY income DESC
LIMIT 20;

 * postgresql://basic_user:***@127.0.0.1:5432/pagila
20 rows affected.


income,film_id,title
463.46,879,TELEGRAPH VOYAGE
447.38,973,WIFE TURN
429.38,1000,ZORRO ARK
419.38,369,GOODFELLAS SALUTE
409.44,764,SATURDAY LAMBS
403.42,893,TITANS JERK
397.44,897,TORQUE BOUND
391.4,403,HARRY IDAHO
383.48,460,INNOCENT USUAL
381.56,444,HUSTLER PARTY


#### What are the cities whose customers provide the most income in this database?
So what are the cities where customers live which, when aggregated, have paid the most?

In [58]:
%%sql
SELECT SUM(payment.amount) as income, city.city_id, city.city
FROM payment
JOIN rental ON payment.rental_id = rental.rental_id
JOIN customer ON rental.customer_id = customer.customer_id
JOIN address ON customer.address_id = address.address_id
JOIN city on address.city_id = city.city_id
GROUP BY city.city_id, city.city
ORDER BY income DESC
LIMIT 10;

 * postgresql://basic_user:***@127.0.0.1:5432/pagila
10 rows affected.


income,city_id,city
443.1,101,Cape Coral
433.08,442,Saint-Denis
397.0,42,Aurora
391.16,340,Molodetno
389.22,29,Apeldoorn
389.22,456,Santa Brbara dOeste
373.24,423,Qomsheh
361.04,312,London
355.2,388,Ourense (Orense)
351.22,78,Bijapur


#### Revenue of Movie by Customer City and by Month

In [72]:
%%sql
SELECT film.title, 
    SUM(payment.amount) AS income,
    city.city,
    EXTRACT(month FROM payment.payment_date) as month
FROM payment
JOIN rental ON payment.rental_id = rental.rental_id
JOIN customer ON rental.customer_id = customer.customer_id
JOIN address ON customer.address_id = address.address_id
JOIN city on address.city_id = city.city_id
JOIN inventory ON rental.inventory_id = inventory.inventory_id
JOIN film ON inventory.film_id = film.film_id
GROUP BY city.city, month, film.title
ORDER BY month, income DESC
LIMIT 10;

 * postgresql://basic_user:***@127.0.0.1:5432/pagila
10 rows affected.


title,income,city,month
SHOW LORD,23.98,Mannheim,1
AMERICAN CIRCUS,21.98,Callao,1
KISSING DOLLS,21.98,Toulon,1
CASUALTIES ENCINO,21.98,Warren,1
TELEGRAPH VOYAGE,21.98,Naala-Porto,1
MOONSHINE CABIN,19.98,Balaiha,1
MILLION ACE,19.98,Bergamo,1
DARKO DORADO,19.98,Bhilwara,1
MINE TITANS,19.98,Bradford,1
AUTUMN CROW,19.98,Ashgabat,1
