#### Request data from our REST API application

We have developed an REST API application for grants. Use the requests package in a new application to get the data from our app and save it in a separate database.

In [None]:
from flask import Flask, request, make_response, render_template
import json
app = Flask(__name__)

grants = []

#    {"name": request.values["name"],
#     "description": request.values["description"],
#     "price": request.values["price"]}


@app.route('/add_grant', methods=['POST'])
def add_grant():
    grant_json = json.loads(request.get_json())
    try:
        grants.append(grant_json)
        response = {"message":'Grant ' + str(grant_json["name"]) + " is added", "error_code" : 200}
    except:
        response = {"message": 'Grant ' + str(grant_json["name"]) + " is NOT added", "error_code": 500}
        return make_response(json.dumps(response))
    return make_response(json.dumps(response))


@app.route('/delete_grant', methods=["DELETE"])
def delete_grant():
    grant_json = request.get_json()
    grant_id = int(grant_json["grant_id"])
    try:
        del grants[grant_id]
        response = {"message":'Grant ' + str(grant_id) + " has been deleted", "error_code" : 200}
    except:
        response = {"message": 'Grant ' + str(grant_id) + " doesn't exist", "error_code": 500}
        return make_response(json.dumps(response))
    return make_response(json.dumps(response))


@app.route('/grants', methods=["GET"])
def grants_list():
    return make_response(json.dumps(grants))


@app.route('/grant/<int:id>', methods=["GET"])
def grant(id):
    return make_response(json.dumps(grant[id]))


@app.route('/', methods=["GET"])
def index():
    user_agent = request.headers.get('User-Agent')
    return '<p>Your browser is {}</p>'.format(user_agent)

if __name__ == '__main__':
    app.run(port=7001)

In [None]:
!curl -X POST -d '{"name":"sample","description":"description1","price":"1"}' -H "Content-Type: application/json" http://localhost:7001/add_grant

In [None]:
curl -X DELETE -d '{"grant_id":"0"}' -H "Content-Type: application/json" http://localhost:7001/delete_grant

In [None]:
import requests
import sys
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String
from sqlalchemy.orm import sessionmaker
import json

port = "7001"

Base = declarative_base()
engine = create_engine('sqlite:////Users/kprzystalski/data.sqlite', echo=True)
Session = sessionmaker(bind=engine)
session = Session()

class Grant(Base):

    __tablename__ = 'Grants'

    id = Column(Integer, primary_key=True)
    name = Column(String(80))
    description = Column(String(150))
    price = Column(Integer)

    def __init__(self, name, description, price):
        self.name = name
        self.description = description
        self.price = price

    def get_name(self):
        return self.name

def add_new_grant(name, description, price):
    grant = Grant(name=str(name), description=str(description), price=int(price))
    session.add(grant)
    session.commit()

def save_grants():
    response = requests.get('http://localhost:'+port+'/grants')
    if response.status_code == 200:
        grants_json = response.json()
        for grant in grants_json:
            try:
                add_new_grant(grant["name"],grant["description"],grant["price"])
            except:
                print("Couldn't add grant" + str(grant["name"]))

def add_grants():
    grants = []
    grants.append({"name": "sample1", "description": "desc1", "price": "1"})
    grants.append({"name": "sample2", "description": "desc2", "price": "2"})
    grants.append({"name": "sample3", "description": "desc3", "price": "3"})

    for grant in grants:
        response = requests.post('http://localhost:'+port+'/add_grant', json=json.dumps(grant))
        if response.status_code == 200:
            print("Added: " + str(grant["name"]))

def delete_grant(id):
    response = requests.delete('http://localhost:' + port + '/grants', data={'name': 'value'})

def setup_db():
    Base.metadata.create_all(engine)

if __name__ == '__main__':
    if sys.argv[1] == "add":
        add_grants()
    elif sys.argv[1] == "setupdb":
        setup_db()
    else:
        save_grants()


#### Parse Amazon webpage with BeautifulSoap

Using Request and BeautifulSoap find all prices for each subpage for the keyword: Flask development. Find all prices (also the fractions) and print it as a list

In [None]:
import bs4
import requests

headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0'}


result = requests.get("https://www.amazon.com/s?i=aps&k=flask%20development", headers=headers)

amazon_page = result.text #open('amazon_flask.html','r')
soup = bs4.BeautifulSoup(amazon_page, features="lxml")

titles = soup.findAll('span',{"class": "a-size-base-plus a-color-base a-text-normal"})

for title in titles:
    print(title.text)


result = requests.get("https://www.amazon.com/s?i=aps&k=django%20development", headers=headers)
amazon_django_page = result.text
soup = bs4.BeautifulSoup(amazon_django_page, features="lxml")
titles = soup.findAll('span',{"class": "a-size-base-plus a-color-base a-text-normal"})

for title in titles:
    print(title.text)

In [None]:
import bs4
import requests

headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0'}

result = requests.get("https://www.amazon.com/s?i=aps&k=flask%20development", headers=headers)
amazon_page = result.text
soup = bs4.BeautifulSoup(amazon_page, features="lxml")


titles = soup.findAll('span',{"class":"a-size-base-plus a-color-base a-text-normal"})

rows = soup.findAll('div',{"class":"sg-row"})

all_rows = True
previous_title = None
for row in rows:
    inners = row.findAll('div',{"class":"sg-col-inner"})
    for inner in inners:
        title = inner.findAll('span',{"class":"a-size-base-plus a-color-base a-text-normal"})
        if len(title) == 1 and previous_title!=title[0].text:
            if all_rows:
                all_rows = False
                continue
            previous_title=title[0].text
            price = inner.findAll('span',{"class": "a-price", "data-a-size":"l"})
            if len(price) > 0:
                print(title[0].text)
                print(price[0].next_element.text)

####  Save all requested data into a database using SQLAlchemy

Extend the previous example by saving the data into a database and SQLAlchemy. Prepare a model for the items found at Amazon like: title, price, and review.

Server and client scripts are given below.

In [None]:
import requests
import sys
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Float
from sqlalchemy.orm import sessionmaker
import json
import bs4
import requests


port = "7001"

Base = declarative_base()
engine = create_engine('sqlite:////Users/kprzystalski/data.sqlite', echo=True)
Session = sessionmaker(bind=engine)
session = Session()

class Book(Base):

    __tablename__ = 'Books'

    id = Column(Integer, primary_key=True)
    title = Column(String(80))
    price = Column(Float)
    review = Column(String(30))

    def __init__(self, title, price, review):
        self.title = title
        self.price = price
        self.review = review

    def get_name(self):
        return self.title

def add_new_book(title, price, review):
    book = Book(title=str(title), price=float(price), review=str(review))
    session.add(book)
    session.commit()


def save_books():
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0'}

    result = requests.get("https://www.amazon.com/s?i=aps&k=flask%20development", headers=headers)
    amazon_page = result.text
    soup = bs4.BeautifulSoup(amazon_page, features="lxml")

    rows = soup.findAll('div', {"class": "sg-row"})

    all_rows = True
    previous_title = None
    for row in rows:
        inners = row.findAll('div', {"class": "sg-col-inner"})
        for inner in inners:
            title = inner.findAll('span', {"class": "a-size-base-plus a-color-base a-text-normal"})
            if len(title) == 1 and previous_title != title[0].text:
                if all_rows:
                    all_rows = False
                    continue
                previous_title = title[0].text
                price = inner.findAll('span', {"class": "a-price", "data-a-size": "l"})
                if len(price) > 0:
                    review = inner.findAll('span', {"class": "a-icon-alt"})
                    if len(review) == 0:
                        review = "Not rated yet"
                    else:
                        review = review[0].text
                    try:
                        add_new_book(title[0].text,price[0].next_element.text[1:], review)
                    except:
                        print("Couldn't add book" + str(title[0].text))

def setup_db():
    Base.metadata.create_all(engine)

if __name__ == '__main__':
    if "setupdb" in sys.argv:
        setup_db()
    else:
        save_books()

#### Convert the Amazon scrapping script

Scrape all book titles for pyhton development term on Amazon using the concurrency.

In [None]:
import sys
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Float
from sqlalchemy.orm import sessionmaker
import concurrent.futures
import threading
import bs4
import requests


port = "7001"

Base = declarative_base()
engine = create_engine('sqlite:////Users/kprzystalski/data.sqlite', echo=True, connect_args={'check_same_thread': False})
Session = sessionmaker(bind=engine)
session = Session()

thread_local = threading.local()

def get_session():
    if not hasattr(thread_local, "session"):
        thread_local.session = requests.Session()
        return thread_local.session


class Book(Base):

    __tablename__ = 'Books'

    id = Column(Integer, primary_key=True)
    title = Column(String(80))
    price = Column(Float)
    review = Column(String(30))

    def __init__(self, title, price, review):
        self.title = title
        self.price = price
        self.review = review

    def get_name(self):
        return self.title

def add_new_book(title, price, review):
    book = Book(title=str(title), price=float(price), review=str(review))
    session.add(book)
    session.commit()



def save_books(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0'}

    result = requests.get(url, headers=headers)
    amazon_page = result.text
    soup = bs4.BeautifulSoup(amazon_page, features="lxml")

    rows = soup.findAll('div', {"class": "sg-row"})

    all_rows = True
    previous_title = None
    for row in rows:
        inners = row.findAll('div', {"class": "sg-col-inner"})
        for inner in inners:
            title = inner.findAll('span', {"class": "a-size-base-plus a-color-base a-text-normal"})
            if len(title) == 1 and previous_title != title[0].text:
                if all_rows:
                    all_rows = False
                    continue
                previous_title = title[0].text
                price = inner.findAll('span', {"class": "a-price", "data-a-size": "l"})
                if len(price) > 0:
                    review = inner.findAll('span', {"class": "a-icon-alt"})
                    if len(review) == 0:
                        review = "Not rated yet"
                    else:
                        review = review[0].text
                    try:
                        add_new_book(title[0].text, price[0].next_element.text[1:], review)
                    except:
                        print("Couldn't add book" + str(title[0].text))


def save_books_from_url():
    urls = ["https://www.amazon.com/s?i=aps&k=flask%20development", "https://www.amazon.com/s?i=aps&k=django%20development"]
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        executor.map(save_books, urls)


def setup_db():
    Base.metadata.create_all(engine)

if __name__ == '__main__':
    if "setupdb" in sys.argv:
        setup_db()
    else:
        save_books_from_url()