# COURSE 6: Database and SQL for DS
# DB & SQL for DS

## Prerequisite: 

TODO: Preform API calls using the sodapy library to interact with the Socrata API (*API docs at: https://dev.socrata.com/foundry/data.sfgov.org/yitu-d5am* )

TODO: create and store Film Locations in San Francisco data in a db wtih sqlite3 from datasette

1. Install datasette
2. Import bs4, requests 

In [1]:
# !pip install datasette 
# !pip install datasette requests
# !pip install sodapy

In [2]:
import bs4 as bs 
import datasette 
import requests
import sqlite3

In [3]:
import pandas as pd
from sodapy import Socrata

In [4]:
def fetch_data(endpoint,limit=1000):
    all_response = []
    offset = 0
    client = Socrata("data.sfgov.org", None)
    while True:
        # driver code using sodapy
        try:
            response = client.get(endpoint, limit=1000, offset=offset)
        
        except Exception as e:
            print(f'Failed to retrieve data: Reason: {e}')
            break
        
        if not response:
            print(f'No data left to retrieve after offset: {offset}')
            break
        
        # Append the data to all_response
        all_response.extend(response)
        offset += limit
        
    results_df = pd.DataFrame.from_records(response)

    
    return results_df

endpoint = "yitu-d5am"

# Fetch the data
data = fetch_data(endpoint)

# Convert the data to a pandas DataFrame
df = pd.DataFrame(data)



No data left to retrieve after offset: 3000


In [8]:

# Create a SQLite database
conn = sqlite3.connect('FilmLocations.db')
cursor = conn.cursor()


In [10]:

# Create a table
cursor.execute('''
    CREATE TABLE IF NOT EXISTS FilmLocations (
        title TEXT,
        release_year TEXT,
        locations TEXT,
        fun_facts TEXT,
        production_company TEXT,
        distributor TEXT,
        director TEXT,
        writer TEXT,
        actor_1 TEXT,
        actor_2 TEXT,
        actor_3 TEXT
    )
''')

# Insert the data into the table
df.to_sql('FilmLocations', conn, if_exists='replace', index=False)

# Commit and close the connection
conn.commit()
conn.close()
print("Loaded and stored in FilmLocations.db")


Loaded and stored in FilmLocations.db


## THE LABS 

### Select statement:

Suppose we want to retrieve details of all the films from the FilmLocations table. The details of each film record should contain all the columns. The query statement for this is:

**SELECT * FROM FileLocations LIMIT 10**


In [11]:
# Connect to the SQLite database
conn = sqlite3.connect('FilmLocations.db')

# Define the SQL query
query = "SELECT * FROM FilmLocations LIMIT 10"  # Example query to select the first 10 rows
# Execute the query and load the result into a DataFrame
df = pd.read_sql_query(query, conn)

# Close the connection
conn.close()

# Display the DataFrame
df

Unnamed: 0,title,release_year,locations,production_company,distributor,director,writer,actor_1,actor_2,actor_3,:@computed_region_6qbp_sg9q,:@computed_region_ajp5_b2md,:@computed_region_26cr_cadq,fun_facts
0,Experiment in Terror,1962,The Sea Captain's Chest (Fisherman's Wharf),Columbia Pictures Corporation,Columbia Pictures,Blake Edwards,The Gordons,Glenn Ford,Lee Remick,Stefanie Powers,99.0,23.0,3.0,
1,Experiment in Terror,1962,100 St. Germain Avenue,Columbia Pictures Corporation,Columbia Pictures,Blake Edwards,The Gordons,Glenn Ford,Lee Remick,Stefanie Powers,47.0,38.0,8.0,
2,Chan is Missing,1982,"Li Po (916 Grant Avenue at Washington, Chinatown)",New Yorker Films,New Yorker Films,Wayne Wang,Wayne Wang,Wood Moy,Marc Hayashi,Lauren Chew,104.0,6.0,3.0,
3,A View to a Kill,1985,Taylor and Jefferson Streets (Fisherman's Wharf),Metro-Goldwyn Mayer,MGM/UA Entertainment Company,John Glen,Richard Maibaum,Roger Moore,Christopher Walken,Tanya Roberts,99.0,23.0,3.0,
4,The Californians,2005,,Parker Film Company,Fabrication Films,Jonathan Parker,Jonathan Parker & Catherine DiNapoli,Noah Wyle,,,21.0,36.0,10.0,
5,Babies,2010,,Canal+,Focus Features,Thomas Balmes,Thomas Balmes,Bayar,Hattie,,21.0,36.0,10.0,
6,I's,2011,1 Post Street,Banshee Cinema,,Chris Edgette,Kyle Tuck,,,,19.0,8.0,3.0,
7,When We Rise,2017,Bay Bridge,Film 49 Productions,Amercian Broadcasting Company,Gus Van Sant,Dustin Lance Black,Guy Pierce,Mary-Louise Parker,Michael Kenneth Williams,,,,
8,Nash Bridges,2021,California Street at Davis,"Village NB Productions, LLC",USA Nework,Greg Beeman,"Carlton Cuse, Bill Chais",Don Johnson,Cheech Marin,Joe Dinicol,108.0,8.0,3.0,
9,This Is Us,2022,Alamo Square Park,20th Television,NBC,Mandy Moore,"Dan Fogelman, Casey Johnson, David Windsor, Ch...",Milo Ventimiglia,Mandy Moore,Sterling K. Brown,22.0,9.0,11.0,


**SELECT Title, Director, Writer FROM FilmLocations;**

In [19]:
conn = sqlite3.connect('FilmLocations.db')

query2 = "SELECT title, director, writer FROM FilmLocations"

df2 = pd.read_sql_query(query2, conn)
conn.close()
df2.head()

Unnamed: 0,title,director,writer
0,Experiment in Terror,Blake Edwards,The Gordons
1,Experiment in Terror,Blake Edwards,The Gordons
2,Chan is Missing,Wayne Wang,Wayne Wang
3,A View to a Kill,John Glen,Richard Maibaum
4,The Californians,Jonathan Parker,Jonathan Parker & Catherine DiNapoli


In [21]:
conn = sqlite3.connect('FilmLocations.db')

query3 = "SELECT title, release_year, locations FROM FilmLocations WHERE release_year>=2001;"

df3 = pd.read_sql_query(query3,conn)
conn.close()
df3.head()

Unnamed: 0,title,release_year,locations
0,The Californians,2005,
1,Babies,2010,
2,I's,2011,1 Post Street
3,When We Rise,2017,Bay Bridge
4,Nash Bridges,2021,California Street at Davis


### Practice

1. Retrieve the fun facts and filming locations of all films.


In [26]:
conn = sqlite3.connect('FilmLocations.db')

prac1_query = "SELECT fun_facts, locations FROM FilmLocations"

df_ans1 = pd.read_sql_query(prac1_query,conn)

conn.close()

df_ans1

Unnamed: 0,fun_facts,locations
0,,The Sea Captain's Chest (Fisherman's Wharf)
1,,100 St. Germain Avenue
2,,"Li Po (916 Grant Avenue at Washington, Chinatown)"
3,,Taylor and Jefferson Streets (Fisherman's Wharf)
4,,
5,,
6,,1 Post Street
7,,Bay Bridge
8,,California Street at Davis
9,,Alamo Square Park


2. Retrieve the names of all films released in the 20th century and before (release years before 2000 including 2000), along with filming locations and release years.


In [30]:
conn = sqlite3.connect('FilmLocations.db')

prac2_query = "SELECT title, locations, release_year FROM FilmLocations WHERE release_year >= 2000;"
df_ans2 = pd.read_sql_query(prac2_query, conn)
conn.close()

df_ans2

Unnamed: 0,title,locations,release_year
0,The Californians,,2005
1,Babies,,2010
2,I's,1 Post Street,2011
3,When We Rise,Bay Bridge,2017
4,Nash Bridges,California Street at Davis,2021
5,This Is Us,Alamo Square Park,2022


3. Retrieve the names, production company names, filming locations, and release years of the films not written by James Cameron.


In [36]:
conn = sqlite3.connect('FilmLocations.db')

prac3_query = "SELECT title, production_company, locations, release_year FROM FilmLocations WHERE writer != 'James Cameron';" 
df_ans3 = pd.read_sql_query(prac3_query, conn)
conn.close()

df_ans3

Unnamed: 0,title,production_company,locations,release_year
0,Experiment in Terror,Columbia Pictures Corporation,The Sea Captain's Chest (Fisherman's Wharf),1962
1,Experiment in Terror,Columbia Pictures Corporation,100 St. Germain Avenue,1962
2,Chan is Missing,New Yorker Films,"Li Po (916 Grant Avenue at Washington, Chinatown)",1982
3,A View to a Kill,Metro-Goldwyn Mayer,Taylor and Jefferson Streets (Fisherman's Wharf),1985
4,The Californians,Parker Film Company,,2005
5,Babies,Canal+,,2010
6,I's,Banshee Cinema,1 Post Street,2011
7,When We Rise,Film 49 Productions,Bay Bridge,2017
8,Nash Bridges,"Village NB Productions, LLC",California Street at Davis,2021
9,This Is Us,20th Television,Alamo Square Park,2022
