# From SQL to Pandas Challenge 2

First, we'll need to set up the tables again, so we can work with them.

In [2]:
# import libraries
import pandas as pd

# load data
# This code is made to load our data stored on Google Drive
def gd_path(file_id):
    """Generate a shareable link from Google Drive file id."""
    return f"https://drive.google.com/uc?export=download&id={file_id}"

# Google Drive file ids
files_id = {
    "titleauthor": "1F1JOiYXStWacOBca6coNVfyVtoST7ZgD",
    "titles": "1PLdn50N9GRa53ZbuVWo0l47F_IXdvlEm",
    "stores": "1f-GCgip7O93CpbAkYvOsc21eKnSOSHsQ", 
    "sales": "1fzFc9rwYmVIPaGOFmhLVxCi3kg19vNU2", 
    "roysched": "1zPRZPoFPEMKyrNR5VSENeYFHGCBZmxbs", 
    "publishers": "1s9E8_AVOziTrowb3wyh2jg3PV763VOyq",
    "pub_info": "1OEgogcGKy--EpuVj0kqq7lyBZNGW6YSv", 
    "jobs": "1V1Za8hUdXD-vJOyRdX4aQV5wanIff2eM", 
    "employee": "1h9mUjsVqpP74b1w0x7KOw37n_n9Ulkt5", 
    "discounts": "111dvSxMcCsTgOuV1wDSKFJxO1Xcxd9VS", 
    "authors": "1fEF89Nhe61EebAljKlwFwfEuokK0o6aJ"
}

# Read data from Google Drive
sales = pd.read_csv(gd_path(files_id["sales"]), sep=";")
authors = pd.read_csv(gd_path(files_id["authors"]), sep=";")
publishers = pd.read_csv(gd_path(files_id["publishers"]), sep=";")

## 1. Select first and last name from authors who have the last name "Ringer"

In [None]:
authors.loc[authors['au_lname']=='Ringer', ['au_fname', 'au_lname']]

Unnamed: 0,au_fname,au_lname
21,Anne,Ringer
22,Albert,Ringer


### Hint:


In SQL the syntax is:

```sql
SELECT au_fname, au_lname
FROM authors
WHERE au_lname="Ringer";
```

In [3]:
authors[authors["au_lname"] == "Ringer"][['au_fname', 'au_lname']]

Unnamed: 0,au_fname,au_lname
21,Anne,Ringer
22,Albert,Ringer


## 2. Select the order number and order date of all sales where *exactly* 20 books where sold.

In [None]:
sales.loc[sales['qty'] == 20, ['ord_num', 'ord_date']]

Unnamed: 0,ord_num,ord_date
6,P2121,1992-06-15 00:00:00
7,P2121,1992-06-15 00:00:00
8,N914008,1994-09-14 00:00:00
10,P3087a,1993-05-29 00:00:00


### Hint:

In SQL the syntax is: 

```sql
SELECT ord_num, ord_date, qty 
FROM sales 
WHER'E qty = 20;
```

## 3. Select first and last name from authors whose last name is "Ringer" and first name is "Anne"

In [None]:
authors.loc[(authors['au_lname']=='Ringer') & (authors['au_fname']=='Anne'), ['au_fname', 'au_lname']]

Unnamed: 0,au_fname,au_lname
21,Anne,Ringer


### Hint:

In SQL the syntax is:

```sql
SELECT au_fname, au_lname
FROM authors
WHERE au_fname = "Anne"
AND au_lname = "Ringer";
```

### 4. Select the name and city of publishers based in either Germany or France

In [4]:
(
publishers.loc[(publishers['country']=='Germany') 
              | (publishers['country']== 'France'), ['pub_name', 'city']]
)

Unnamed: 0,pub_name,city
5,GGG&G,Mnchen
7,Lucerne Publishing,Paris


### Hint:

In SQL the syntax is:

```sql
SELECT pub_name, city 
FROM publishers 
WHERE country='Germany' 
OR country='France';
```

## 5. Select first name, last name and city from authors whose first name is "Dean" and whose city is either "Oakland" or "Berkeley"

In [6]:
(
authors.loc[(authors['au_fname']=='Dean') 
            & ((authors['city']=='Oakland') 
            | (authors['city']=='Berkeley')), ['au_fname', 'au_lname', 'city']]
)

Unnamed: 0,au_fname,au_lname,city
4,Dean,Straight,Oakland


Please look closely at the brackets above, we have a extra set of parentheses `()` surrounding the Oakland or Berkley clause. Why do we need these? What difference do they make?

### Hint:

In SQL the syntax is:

```sql
SELECT au_fname,au_lname,city
FROM authors
WHERE au_fname ="Dean" 
AND (city ="Oakland" OR city = "Berkeley");
```

## 6. Select the name, city and country of publishers, not based in the USA

In [8]:
publishers.loc[~(publishers['country'] == 'USA'), ['pub_name', 'city', 'country']]

Unnamed: 0,pub_name,city,country
5,GGG&G,Mnchen,Germany
7,Lucerne Publishing,Paris,France


### Hint:

The SQL syntax is:
```sql
SELECT pub_name, city,country
FROM publishers
WHERE NOT country="USA";
```

In [7]:
publishers[["pub_name", "city", "country"]][publishers["country"] != "USA"]

Unnamed: 0,pub_name,city,country
5,GGG&G,Mnchen,Germany
7,Lucerne Publishing,Paris,France


alternative solution

In [None]:
# publishers.loc[(publishers['country'] != 'USA'), ['pub_name', 'city', 'country']]

## 7. Select first name, last name and city from authors whose city is "Oakland" or "Berkeley", and last name is **not** "Straight"

In [None]:
(
authors.loc[((authors['city'] == 'Oakland') 
              | (authors['city'] == 'Berkeley')) 
              & (authors['au_lname'] != 'Straight'), ['au_fname', 'au_lname', 'city']]
)

Unnamed: 0,au_fname,au_lname,city
1,Marjorie,Green,Oakland
2,Cheryl,Carson,Berkeley
6,Abraham,Bennet,Berkeley
15,Dirk,Stringer,Oakland
16,Stearns,MacFeather,Oakland
17,Livia,Karsen,Oakland


### Hint:

In SQL the syntax is:
```sql
SELECT au_fname, au_lname, city 
FROM authors
WHERE (city = "Oakland" OR city = "Berkeley") 
AND NOT au_lname = "Straight";
```

In [9]:
# Subin

authors_name_city = authors.query('au_lname != "Straight" and city in ("Oakland","Berkeley")')
authors_name_city[['au_fname','au_lname','city']]

Unnamed: 0,au_fname,au_lname,city
1,Marjorie,Green,Oakland
2,Cheryl,Carson,Berkeley
6,Abraham,Bennet,Berkeley
15,Dirk,Stringer,Oakland
16,Stearns,MacFeather,Oakland
17,Livia,Karsen,Oakland


In [11]:
# Andreas
(
authors[((authors.city=='Oakland')
        | (authors.city=='Berkeley'))
        & ~(authors.au_lname=='Straight')][['au_fname','au_lname','city']]
)

Unnamed: 0,au_fname,au_lname,city
1,Marjorie,Green,Oakland
2,Cheryl,Carson,Berkeley
6,Abraham,Bennet,Berkeley
15,Dirk,Stringer,Oakland
16,Stearns,MacFeather,Oakland
17,Livia,Karsen,Oakland
