# Get All projects in GTR

This script gathers all of the persons endpoint data in the gtr database.
Results are stored in a Postgres database as json entries.

First, import the relevant packages (gtr can be found at https://github.com/nestauk/gtr).

In [10]:
import gtr
import json
import sqlalchemy
import time

from sqlalchemy import Column, Integer, schema, Text  
from sqlalchemy.dialects.postgresql import JSON, JSONB
from sqlalchemy.ext.declarative import declarative_base  
from sqlalchemy.orm import sessionmaker
from sqlalchemy.schema import CreateSchema

In [11]:
with open('config.json') as f:
    conf= json.load(f)
    
user = conf['user']
host = conf['host']
port = conf['port']
passw = conf['passw']
schema = conf['schema']
database = conf['database']

In [12]:
Base = declarative_base()  
class Project(Base):  
    """Define the Project object schema"""
    __tablename__ = 'projects'
    __table_args__ = {"schema": schema}
    id = Column(Text, primary_key=True)
    title = Column(Text)
    doc = Column(JSONB)

In [13]:
def rate_limited(maxPerSecond):
    """Create a rate_limited decorator that limits function calls to x per second"""
    minInterval = 1.0 / float(maxPerSecond)
    def decorate(func):
        lastTimeCalled = [0.0]
        def rateLimitedFunction(*args,**kargs):
            elapsed = time.clock() - lastTimeCalled[0]
            leftToWait = minInterval - elapsed
            if leftToWait>0:
                time.sleep(leftToWait)
            ret = func(*args,**kargs)
            lastTimeCalled[0] = time.clock()
            return ret
        return rateLimitedFunction
    return decorate

In [14]:
@rate_limited(2)
def add_projects_to_db(data):
    """Loops through JSON and appends a new Project object to a list
    based on their key.
    """
    project_list = []
    for project in data:
        id=project["id"]
        title=project["title"]
        doc=dict([(i, project[i]) for i in project.keys() if i not in keys])
        project_list.append(Project(id=id,
                                title=title,
                                doc=doc))

    [session.add(project) for project in project_list]
    session.commit()

In [15]:
conn_str =  'postgresql://{}:@{}:{}/{}'.format(user,
                                              host,
                                              port,
                                              database)
db = sqlalchemy.create_engine(conn_str)
engine = db.connect()
    
meta = sqlalchemy.MetaData(engine, schema=schema)

In [None]:
s = gtr.Projects()                            # Create a Persons object
results = s.projects("", s=100)               # Get the first page of results using max items per page
total_pages = results.json()["totalPages"]    # Total number of pages to loop through
data = results.json()["project"]              # Save the returned JSON to data
add_projects_to_db(data)                      # Add the returned data to the DB

In [None]:
#page = 2
while page <= total_pages:
    results = s.projects("", s=100, p=page)
    data = results.json()["project"]
    add_projects_to_db(data) 
    page +=1