# Get All persons in GTR

This script gathers all of the persons endpoint data in the gtr database.
Results are stored in a Postgres database as json entries.

First, import the relevant packages (gtr can be found at https://github.com/nestauk/gtr).

In [None]:
import gtr
import json
import sqlalchemy
import time

from sqlalchemy import Column, Integer, schema, Text  
from sqlalchemy.dialects.postgresql import JSON, JSONB
from sqlalchemy.ext.declarative import declarative_base  
from sqlalchemy.orm import sessionmaker

In [None]:
Base = declarative_base()  
class Person(Base):  
    """Define the Person object schema"""
    __tablename__ = 'persons'
    __table_args__ = {"schema": "gtr"}
    id = Column(Text, primary_key=True)
    name = Column(Text)
    doc = Column(JSONB)

In [None]:
def rate_limited(maxPerSecond):
    """Create a rate_limited decorator that liits function calls to x per second"""
    minInterval = 1.0 / float(maxPerSecond)
    def decorate(func):
        lastTimeCalled = [0.0]
        def rateLimitedFunction(*args,**kargs):
            elapsed = time.clock() - lastTimeCalled[0]
            leftToWait = minInterval - elapsed
            if leftToWait>0:
                time.sleep(leftToWait)
            ret = func(*args,**kargs)
            lastTimeCalled[0] = time.clock()
            return ret
        return rateLimitedFunction
    return decorate

In [None]:
@rate_limited(2)
def add_persons_to_db(data):
    """Loops through JSON and appends a new Person object to a list
    based on their key.
    """
    session = SessionFactory()
    user_list = []
    for person in data:
        id=person["id"]
        name=" ".join([person["firstName"], person["surname"]])
        doc={"data": {"href": person["href"], "links": person["links"]}}
        user_list.append(Person(id=id,
                                name=name,
                                doc=doc))

    [session.add(person) for person in user_list]
    session.commit()

In [None]:
connection_string = 'postgresql://james:@localhost:5432/arloesiadur'
db = sqlalchemy.create_engine(connection_string)
engine = db.connect()
meta = sqlalchemy.MetaData(engine, schema="gtr")

In [None]:
# Define a new table schema
sqlalchemy.Table("persons", meta,                       
                Column('id', Text, primary_key=True),
                Column('name', Text),
                Column('doc', JSONB))


meta.create_all()    # Create the table if it doesn't exist

In [None]:
SessionFactory = sessionmaker(engine)    # New SessionFactory object
Base.metadata.create_all(engine)         # Create a table if doesn't exist using declaritive_base()
                                         # which is aware of the Person class

In [None]:
s = gtr.Persons()                             # Create a Persons object
results = s.persons("", s=100)                # Get the first page of results using max items per page
total_pages = results.json()["totalPages"]    # Total number of pages to loop through
data = results.json()["person"]               # Save the returned JSON to data
add_persons_to_db(data)                       # Add the returned data to the DB

In [None]:
page = 0
while page <= total_pages:
    results = s.persons("", s=100, p=page)
    data = results.json()["person"]
    add_persons_to_db(data) 
    page +=1