### Setup the connection 

In [10]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/Users/Myrto.Setzi/Ironhack Data Analytics.json"

In [11]:
from google.cloud import bigquery

In [12]:
client = bigquery.Client()

### Challenge 1 - Who Have Published What At Where?

In [51]:
query_1='''
SELECT 
    a.au_id AS author_id
    , au_lname AS last_name
    , au_fname AS first_name
    , t.title AS title
    , p.pub_name AS publisher
FROM  
    `ironhack-data-analytics.publications.authors` a
INNER JOIN 
    `ironhack-data-analytics.publications.titleauthor` ta 
ON
    a.au_id=ta.au_id
INNER JOIN 
    `ironhack-data-analytics.publications.titles` t 
ON 
    ta.title_id=t.title_id
INNER JOIN
    `ironhack-data-analytics.publications.publishers` p 
ON 
    t.pub_id=p.pub_id
'''

In [52]:
query_job_1 = client.query(query=query_1)

In [53]:
df_1=query_job_1.to_dataframe()

In [54]:
df_1.head(10)

Unnamed: 0,author_id,last_name,first_name,title,publisher
0,807-91-6654,Panteley,Sylvia,"Onions, Leeks, and Garlic: Cooking Secrets of ...",Binnet & Hardley
1,722-51-5454,DeFrance,Michel,The Gourmet Microwave,Binnet & Hardley
2,712-45-1867,del Castillo,Innes,Silicon Valley Gastronomic Treats,Binnet & Hardley
3,899-46-2035,Ringer,Anne,Is Anger the Enemy?,New Moon Books
4,899-46-2035,Ringer,Anne,The Gourmet Microwave,Binnet & Hardley
5,998-72-3567,Ringer,Albert,Is Anger the Enemy?,New Moon Books
6,998-72-3567,Ringer,Albert,Life Without Fear,New Moon Books
7,172-32-1176,White,Johnson,Prolonged Data Deprivation: Four Case Studies,New Moon Books
8,486-29-1786,Locksley,Charlene,Net Etiquette,Algodata Infosystems
9,486-29-1786,Locksley,Charlene,Emotional Security: A New Algorithm,New Moon Books


In [55]:
query_2='''
SELECT 
    COUNT(*) AS vol
FROM  
     `ironhack-data-analytics.publications.titleauthor`
'''

In [56]:
query_job_2 = client.query(query=query_2)

In [57]:
df_2=query_job_2.to_dataframe()
df_2

Unnamed: 0,vol
0,25


In [58]:
df_1.count()

author_id     25
last_name     25
first_name    25
title         25
publisher     25
dtype: int64

### Challenge 2 - Who Have Published How Many At Where?

In [59]:
query_3='''
SELECT
    a.au_id AS author_id
    , au_lname AS last_name
    , au_fname AS first_name
    , p.pub_name AS publisher
    , COUNT(t.title_id) AS title_count
FROM
    `ironhack-data-analytics.publications.authors` a
INNER JOIN
    `ironhack-data-analytics.publications.titleauthor` ta 
ON
    a.au_id=ta.au_id
INNER JOIN 
    `ironhack-data-analytics.publications.titles` t 
ON
    ta.title_id=t.title_id
INNER JOIN
    `ironhack-data-analytics.publications.publishers` p 
ON
    t.pub_id=p.pub_id
GROUP BY
    1,2,3,4
    '''

In [60]:
query_job_3 = client.query(query=query_3)

In [61]:
df_3=query_job_3.to_dataframe()
df_3.head(10)

Unnamed: 0,author_id,last_name,first_name,publisher,title_count
0,712-45-1867,del Castillo,Innes,Binnet & Hardley,1
1,899-46-2035,Ringer,Anne,New Moon Books,1
2,724-80-9391,MacFeather,Stearns,Binnet & Hardley,1
3,238-95-7766,Carson,Cheryl,Algodata Infosystems,1
4,724-80-9391,MacFeather,Stearns,Algodata Infosystems,1
5,472-27-2349,Gringlesby,Burt,Binnet & Hardley,1
6,998-72-3567,Ringer,Albert,New Moon Books,2
7,486-29-1786,Locksley,Charlene,New Moon Books,1
8,756-30-7391,Karsen,Livia,Binnet & Hardley,1
9,722-51-5454,DeFrance,Michel,Binnet & Hardley,1


### Challenge 3 - Best Selling Authors

In [62]:
query_4='''
SELECT
    a.au_id AS author_id
    , au_lname AS last_name
    , au_fname AS first_name
    , SUM(s.qty) AS total
FROM
    `ironhack-data-analytics.publications.authors` a
INNER JOIN 
    `ironhack-data-analytics.publications.titleauthor` ta 
ON
    ta.au_id = a.au_id
INNER JOIN
    `ironhack-data-analytics.publications.titles` t 
ON
    t.title_id = ta.title_id
INNER JOIN
    `ironhack-data-analytics.publications.sales` s 
ON
    s.title_id = t.title_id
GROUP BY 
    1,2,3
ORDER BY 
    total DESC
LIMIT 3
'''

In [63]:
query_job_4 = client.query(query=query_4)

In [64]:
df_4=query_job_4.to_dataframe()
df_4

Unnamed: 0,author_id,last_name,first_name,total
0,899-46-2035,Ringer,Anne,148
1,998-72-3567,Ringer,Albert,133
2,213-46-8915,Green,Marjorie,50


### Challenge 4 - Best Selling Authors Ranking

In [65]:
query_5='''
SELECT
    a.au_id AS author_id
    , au_lname AS last_name
    , au_fname AS first_name
    , COALESCE(SUM(s.qty), 0) AS total
FROM
    `ironhack-data-analytics.publications.authors` a
LEFT JOIN 
    `ironhack-data-analytics.publications.titleauthor` ta 
ON
    ta.au_id = a.au_id
LEFT JOIN
    `ironhack-data-analytics.publications.titles` t 
ON
    t.title_id = ta.title_id
LEFT JOIN
    `ironhack-data-analytics.publications.sales` s 
ON
    s.title_id = t.title_id
GROUP BY
    1,2,3
ORDER BY
    total DESC;
'''

In [66]:
query_job_5 = client.query(query=query_5)
df_5=query_job_5.to_dataframe()
df_5

Unnamed: 0,author_id,last_name,first_name,total
0,899-46-2035,Ringer,Anne,148
1,998-72-3567,Ringer,Albert,133
2,213-46-8915,Green,Marjorie,50
3,427-17-2319,Dull,Ann,50
4,846-92-7186,Hunter,Sheryl,50
5,724-80-9391,MacFeather,Stearns,45
6,267-41-2394,O'Leary,Michael,45
7,807-91-6654,Panteley,Sylvia,40
8,722-51-5454,DeFrance,Michel,40
9,238-95-7766,Carson,Cheryl,30


### Bonus Challenge - Most Profiting Authors

In [1]:
query_6='''
WITH royalties_table AS(
SELECT
    title_id
    , au_id
    , au_lname
    , au_fname
    , advance
    , SUM(royalties) AS royalties 
FROM 
    (SELECT
        t.title_id
        , t.price
        , t.advance
        , t.royalty
        , s.qty
        , a.au_id
        , au_lname
        , au_fname
        , ta.royaltyper
        , (t.price * s.qty * t.royalty * ta.royaltyper / 10000) AS royalties
    FROM 
        `ironhack-data-analytics.publications.titles` t
    INNER JOIN
        `ironhack-data-analytics.publications.sales` s 
    ON
        s.title_id = t.title_id
    INNER JOIN
        `ironhack-data-analytics.publications.titleauthor` ta 
    ON
        ta.title_id = s.title_id
    INNER JOIN
        `ironhack-data-analytics.publications.authors` a 
    ON
        a.au_id = ta.au_id)a
GROUP BY
    1,2,3,4,5)
            SELECT
                au_id AS author_id
                , au_lname AS last_name
                , au_fname AS first_name
                , sum(advance + royalties) AS profits
            FROM
                royalties_table
            GROUP BY
                1,2,3
            ORDER BY
                profits DESC
            LIMIT 3
'''

In [68]:
query_job_6 = client.query(query=query_6)
df_6=query_job_6.to_dataframe()
df_6

Unnamed: 0,author_id,last_name,first_name,profits
0,899-46-2035,Ringer,Anne,17353.132
1,213-46-8915,Green,Marjorie,15162.11
2,722-51-5454,DeFrance,Michel,15021.528
