<a href="https://colab.research.google.com/github/matthewpecsok/data_engineering/blob/main/tutorials/de_api_tutorial_northwind.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# get the database

In [1]:
!wget -O northwind.db https://github.com/matthewpecsok/data_engineering/raw/main/data/northwind.db

--2024-08-02 20:00:47--  https://github.com/matthewpecsok/data_engineering/raw/main/data/northwind.db
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/matthewpecsok/data_engineering/main/data/northwind.db [following]
--2024-08-02 20:00:47--  https://raw.githubusercontent.com/matthewpecsok/data_engineering/main/data/northwind.db
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 602112 (588K) [application/octet-stream]
Saving to: ‘northwind.db’


2024-08-02 20:00:48 (11.8 MB/s) - ‘northwind.db’ saved [602112/602112]



# install required libraries

In [2]:
!pip install flask # webserver
!pip install faker # fake data generator

Collecting faker
  Downloading Faker-26.1.0-py3-none-any.whl.metadata (15 kB)
Downloading Faker-26.1.0-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-26.1.0


In [3]:
import requests

# create the API app in flask

use multithreading to run Flask in a new thread so the notebook is free to continue executing other code.

In [43]:
from flask import Flask, request, jsonify
import sqlite3
import threading
import multiprocessing, time
import pandas as pd

app = Flask(__name__)

DATABASE = 'northwind.db'

@app.route("/")
def home():
    return "Hello World! This is the API homepage. No swagger."



@app.route('/customers', methods=['GET'], strict_slashes=False)
def get_customers():
    conn = sqlite3.connect(DATABASE)
    df = pd.read_sql_query("SELECT * FROM customers",conn)
    df_dict = df.to_dict(orient='records')
    conn.close()
    customers = {}
    customers['customers'] = df_dict
    return jsonify(customers), 200

@app.route('/products', methods=['GET'], strict_slashes=False)
def get_products():
    conn = sqlite3.connect(DATABASE)
    df = pd.read_sql_query("SELECT * FROM products",conn)
    df_dict = df.to_dict(orient='records')
    conn.close()
    products = {}
    products['products'] = df_dict
    return jsonify(products), 200

@app.route('/customers/<string:customer_id>', methods=['GET'], strict_slashes=False)
def get_customer_by_id(customer_id):
    conn = sqlite3.connect(DATABASE)
    df = pd.read_sql_query(f"SELECT * FROM customers where CustomerID = '{customer_id}'",conn)
    if df.shape[0] == 0:
      return jsonify({'error': 'Customer not found'}), 404

    df_dict = df.to_dict(orient='records')

    conn.close()
    return jsonify(df_dict[0])


if __name__ == '__main__':
    process = multiprocessing.Process(target=app.run) # this is required for background running so the cell can be released.
    process.start()
    print(f"Process ID: {process.pid}")



Process ID: 5233
 * Serving Flask app '__main__'
 * Debug mode: off


Address already in use
Port 5000 is in use by another program. Either identify and stop that program, or start the server with a different port.


In [48]:
con = sqlite3.connect(DATABASE)
matt = pd.read_sql_query(f"SELECT * FROM customers where CustomerID = 'ALFKI'",con)
matt

Unnamed: 0,CustomerID,CompanyName,ContactName,ContactTitle,Address,City,Region,PostalCode,Country,Phone,Fax
0,ALFKI,Alfreds Futterkiste,Maria Anders,Sales Representative,Obere Str. 57,Berlin,Western Europe,12209,Germany,030-0074321,030-0076545


In [5]:
conn = sqlite3.connect(DATABASE)
cursor = conn.cursor()
cursor.execute("SELECT * FROM customers where CustomerID = ? ",('BLAUS',)).fetchall()

[('BLAUS',
  'Blauer See Delikatessen',
  'Hanna Moos',
  'Sales Representative',
  'Forsterstr. 57',
  'Mannheim',
  'Western Europe',
  '68306',
  'Germany',
  '0621-08460',
  '0621-08924')]

In [6]:
!ps -ef | grep 1810

root         436     179  0 20:01 ?        00:00:00 /bin/bash -c ps -ef | grep 1810
root         440     436  0 20:01 ?        00:00:00 grep 1810


In [40]:
!kill -9 4369

# get public url

use the url here to make sure flask is running. you should get a webpage that says '*Hello World! This is the API homepage.*'

In [42]:
# this cell outputs the url you can use in your browser to see if the website/api is running.
# it cannot be used locally in python code, for that we need to use 127.0.0.1:5000

from google.colab.output import eval_js
http_url = eval_js("google.colab.kernel.proxyPort(5000)")
print(http_url)

https://ytlr8kiwha-496ff2e9c6d22116-5000-colab.googleusercontent.com/


In [28]:
localhost = 'http://127.0.0.1:5000'

In [37]:
url = f"{localhost}/customers/"
print(url)
resp = requests.get(url)
resp.json()

http://127.0.0.1:5000/customers/


{'customers': [{'Address': 'Obere Str. 57',
   'City': 'Berlin',
   'CompanyName': 'Alfreds Futterkiste',
   'ContactName': 'Maria Anders',
   'ContactTitle': 'Sales Representative',
   'Country': 'Germany',
   'CustomerID': 'ALFKI',
   'Fax': '030-0076545',
   'Phone': '030-0074321',
   'PostalCode': '12209',
   'Region': 'Western Europe'},
  {'Address': 'Avda. de la Constitución 2222',
   'City': 'México D.F.',
   'CompanyName': 'Ana Trujillo Emparedados y helados',
   'ContactName': 'Ana Trujillo',
   'ContactTitle': 'Owner',
   'Country': 'Mexico',
   'CustomerID': 'ANATR',
   'Fax': '(5) 555-3745',
   'Phone': '(5) 555-4729',
   'PostalCode': '05021',
   'Region': 'Central America'},
  {'Address': 'Mataderos  2312',
   'City': 'México D.F.',
   'CompanyName': 'Antonio Moreno Taquería',
   'ContactName': 'Antonio Moreno',
   'ContactTitle': 'Owner',
   'Country': 'Mexico',
   'CustomerID': 'ANTON',
   'Fax': None,
   'Phone': '(5) 555-3932',
   'PostalCode': '05023',
   'Region': '

In [59]:
import requests
customerid = 'BLAUS'
url = f"{localhost}/customers/{customerid}"
print(url)
resp = requests.get(url)
print(resp)
resp.json()

http://127.0.0.1:5000/customers/BLAUS
<Response [200]>


{'Address': 'Forsterstr. 57',
 'City': 'Mannheim',
 'CompanyName': 'Blauer See Delikatessen',
 'ContactName': 'Hanna Moos',
 'ContactTitle': 'Sales Representative',
 'Country': 'Germany',
 'CustomerID': 'BLAUS',
 'Fax': '0621-08924',
 'Phone': '0621-08460',
 'PostalCode': '68306',
 'Region': 'Western Europe'}

In [60]:
import requests
customerid = 'Hello'
url = f"{localhost}/customers/{customerid}"
print(url)
resp = requests.get(url)
print(resp)
resp.json()

http://127.0.0.1:5000/customers/Hello
<Response [404]>


{'error': 'Customer not found'}

In [39]:
url = f"{localhost}/products/"
print(url)
resp = requests.get(url)
resp.json()

http://127.0.0.1:5000/products/


{'products': [{'CategoryID': 1,
   'Discontinued': '0',
   'ProductID': 1,
   'ProductName': 'Chai',
   'QuantityPerUnit': '10 boxes x 20 bags',
   'ReorderLevel': 10,
   'SupplierID': 1,
   'UnitPrice': 18.0,
   'UnitsInStock': 39,
   'UnitsOnOrder': 0},
  {'CategoryID': 1,
   'Discontinued': '0',
   'ProductID': 2,
   'ProductName': 'Chang',
   'QuantityPerUnit': '24 - 12 oz bottles',
   'ReorderLevel': 25,
   'SupplierID': 1,
   'UnitPrice': 19.0,
   'UnitsInStock': 17,
   'UnitsOnOrder': 40},
  {'CategoryID': 2,
   'Discontinued': '0',
   'ProductID': 3,
   'ProductName': 'Aniseed Syrup',
   'QuantityPerUnit': '12 - 550 ml bottles',
   'ReorderLevel': 25,
   'SupplierID': 1,
   'UnitPrice': 10.0,
   'UnitsInStock': 13,
   'UnitsOnOrder': 70},
  {'CategoryID': 2,
   'Discontinued': '0',
   'ProductID': 4,
   'ProductName': "Chef Anton's Cajun Seasoning",
   'QuantityPerUnit': '48 - 6 oz jars',
   'ReorderLevel': 0,
   'SupplierID': 2,
   'UnitPrice': 22.0,
   'UnitsInStock': 53,
   

In [10]:
response = requests.get('http://127.0.0.1:5000/customers/WELLI')
response.json()

[{'Address': 'Rua do Mercado, 12',
  'City': 'Resende',
  'CompanyName': 'Wellington Importadora',
  'ContactName': 'Paula Parente',
  'ContactTitle': 'Sales Manager',
  'Country': 'Brazil',
  'CustomerID': 'WELLI',
  'Fax': None,
  'Phone': '(14) 555-8122',
  'PostalCode': '08737-363',
  'Region': 'South America'}]

In [11]:
response = requests.get('http://127.0.0.1:5000/products/')
response.json()

[{'CategoryID': 1,
  'Discontinued': '0',
  'ProductID': 1,
  'ProductName': 'Chai',
  'QuantityPerUnit': '10 boxes x 20 bags',
  'ReorderLevel': 10,
  'SupplierID': 1,
  'UnitPrice': 18.0,
  'UnitsInStock': 39,
  'UnitsOnOrder': 0},
 {'CategoryID': 1,
  'Discontinued': '0',
  'ProductID': 2,
  'ProductName': 'Chang',
  'QuantityPerUnit': '24 - 12 oz bottles',
  'ReorderLevel': 25,
  'SupplierID': 1,
  'UnitPrice': 19.0,
  'UnitsInStock': 17,
  'UnitsOnOrder': 40},
 {'CategoryID': 2,
  'Discontinued': '0',
  'ProductID': 3,
  'ProductName': 'Aniseed Syrup',
  'QuantityPerUnit': '12 - 550 ml bottles',
  'ReorderLevel': 25,
  'SupplierID': 1,
  'UnitPrice': 10.0,
  'UnitsInStock': 13,
  'UnitsOnOrder': 70},
 {'CategoryID': 2,
  'Discontinued': '0',
  'ProductID': 4,
  'ProductName': "Chef Anton's Cajun Seasoning",
  'QuantityPerUnit': '48 - 6 oz jars',
  'ReorderLevel': 0,
  'SupplierID': 2,
  'UnitPrice': 22.0,
  'UnitsInStock': 53,
  'UnitsOnOrder': 0},
 {'CategoryID': 2,
  'Discontinu