In [None]:
import os
import json
import requests
import logging

import geopandas as gpd
import pandas as pd

from dotenv import load_dotenv
from requests.adapters import HTTPAdapter, Retry
from sqlalchemy import create_engine
from sqlalchemy.orm import declarative_base, sessionmaker
from sqlalchemy import Column, Integer, Text, Date, Float
from geoalchemy2 import Geometry
from mappings import EVENT_CODES, EVENT_BASE_CODES, EVENT_ROOT_CODES, MAP_FIPS_TO_ISO2

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
logger = logging.getLogger()
logger.setLevel(logging.INFO)

load_dotenv()

class RequestHandler:
    """
    Reusable and extensible request handler. 
    
    Todo
    ----
    Provide more generalized functionality. I.E. the ability to retrieve the
    response object directly, as well as the ability to POST messages.
    """
    def __init__(self, 
                 total: int = 3, 
                 backoff: float = 0.1, 
                 redirect: int= 3) -> None:
        """RequestHandler init function.
        

        Parameters
        ----------
        total : int, optional
            Total number of times to retry the endpoint when `get_content` is 
            invoked. The default is 3.
        backoff : float, optional
            Delay added between consecutive requests in the event an endpoint 
            fails to respond with a status 200 message. The default is 0.1.
        redirect : int, optional
            Maximum number of redirects permitted before requests library fails. 
            The default is 3.

        Returns
        -------
        None

        """
        logger.info('Init request handler')
        self.sess = requests.Session()
        
        retries = Retry(
            total=total,
            backoff_factor=backoff,
            redirect=redirect
        )
        
        self.sess.mount('http://', HTTPAdapter(max_retries=retries))
        logger.info('Request handler initialized')
        
    def get_content(self, url: str) -> requests.Response:
        """Fetch response and return the content of the response object.
        

        Parameters
        ----------
        url : str
            Path to target endpoint.

        Returns
        -------
        TYPE
            DESCRIPTION.

        """
        
        resp = self.sess.get(url)
        
        if resp.status_code==200:
            return resp.content
        else:
            logger.info('Endpoint: {} returned status code: {}'.format(
                url, resp.status_code))      
        
TABLE_NAME = "threat_table"
Base = declarative_base()

class Threat(Base):
    """SQLalchemy base class for storing and retreiving objects.
    
    Todo
    ----
    Decide between Integer and Text for Event Codes.
    OPTIMIZATION Consider indexing for increased read performance.
    OPTIMIZATION String(n) not text to min. space requirements where possible
    """
    __tablename__ = TABLE_NAME
    GLOBALEVENTID = Column(Text, primary_key=True) #A
    SQLDATE = Column(Text, nullable=False) #B
    EventCode = Column(Text, nullable=False) #Z
    EventBaseCode = Column(Text, nullable=False) #AA
    EventRootCode = Column(Text, nullable=False) #AB
    ActionGeo_FullName = Column(Text, nullable=False) #BA
    ActionGeo_CountryCode = Column(Text, nullable=False) #BB
    ActionGeo_Lat = Column(Text, nullable=False) #BE
    ActionGeo_Long = Column(Text, nullable=False) #BF
    ActionGeo_Coords = Column(Geometry('POINT'), nullable=False) # BE + BF
    DATEADDED = Column(Text, nullable=False) #BH
    SOURCEURL = Column(Text, nullable=False) #BI
    
class DatabaseHandler:
    """
    Reusable and extensible handler for database connections. 
    
    Todo
    ----
    Extend fetch functionality to enable queries.
    Index database for faster searches
    OPTIMIZATION Implement connection pooling functionality for faster concurrent 
    read/writes.
    
    """
    def __init__(self) -> None:
        """DatabaseHandler init function.
        
        Instatiate connection to Postgres + PostGIS database.
        
        """
        logger.info('Init request handler')
        db_uri = "postgresql://{}:{}@{}:5432/{}".format(
            os.environ.get('DB_USER', 'default_user'), 
            os.environ.get('DB_PW', 'default_pass'), 
            os.environ.get('DB_HOST', 'localhost'), 
            os.environ.get('DB_NAME', 'test_db')
        )
        
        self.engine = create_engine(
            db_uri
        )
        
        self.conn = self.engine.connect() 
        Base.metadata.create_all(self.engine)
        Session = sessionmaker(bind=self.engine)
        self.sess = Session()
        
        logger.info('Database handler initialized')

    def test(self):
        """Retrieve contents of database, count the results, and print a few rows.


        Returns
        -------
        None.

        """
        res = self.fetch_all()
        print('Num rows in db: {}\n'.format(len(res)))
        #logger.info('Num rows in db: {}\n'.format(len(res)))
        
        for idx, row in enumerate(res):
            if idx>= 50:
                break
            
            print(vars(row))
            #logger.info(row)
            
    def close(self) -> None:
        """Close the database connection.
        

        Returns
        -------
        None.

        """
        self.sess.close()
        self.conn.close()
        self.engine.dispose()
        logger.info('Database connection safely closed.')

    def fetch_all(self) -> list:
        """Return results matching all rows.
        

        Returns
        -------
        list
            list of rows retrieved from database.
            
        Todo
        ----
        Enable querying for particular data. This is sufficient for demo purposes.
        OPTIMIZATION Enable indexing

        """
        test_data = self.sess.query(Threat).all()
        return test_data
    
    def upload(self, gdf: gpd.GeoDataFrame()) -> None:
        """Populate table with the contents of `gdf`.
        

        Parameters
        ----------
        gdf : gpd.GeoDataFrame()
            Geodataframe containing points of interest within the United States
            as well as intel sources, dates, and event codings.

        Returns
        -------
        None
        
        Todo
        ----
        OPTIMIZATION Enable caching

        """
        gdf.to_postgis(
            TABLE_NAME, 
            self.engine
        )  
        
        print('Adding {} rows'.format(len(gdf.index)))
        logger.info('GeoDataFrame successfully added to table.')
        
class DataHandler:
    """
    Custom handler for acquiring, prepping, and storing data from `tar_url` and
    associated export files.
    
    Todo
    ----
    
    
    """
    tar_url = 'http://data.gdeltproject.org/gdeltv2/lastupdate.txt'
    
    def __init__(self) -> None:
        """DataHandler init function.
        
        Instantiate DBHandler and RequestHandler classes.

        """
        logger.info('Init data handler')
        self.db = DatabaseHandler()
        self.reqs = RequestHandler()
        logger.info('Data handler initialized')
        
    def run(self) -> None:
        pass

class RunHandler(DataHandler):
    """Custom runner use with the `DataHandler` class.    
    """
    
    def __init__(self) -> None:
        """RunHandler init function.
        
        Trigger parent classed initialization.

        """        
        DataHandler.__init__(self)
    
    
    def run(self) -> None:
        results = str(self.reqs.get_content(DataHandler.tar_url), 'UTF-8')
        urls_list = self._process_latest(results)
        print(urls_list)
        
        '''
        Instead of hardcoding (i.e. url = url_list[0]) use list comprehension
        plus if statement to find the desired url. This protects against 
        failures if/when endpoint changes result in the returned list being in
        an unexpected order.
        '''
        url = [url for url in urls_list if 'export' in url][0]
        print(url)
        
        results = io.BytesIO(self.reqs.get_content(url))
        
        #Unpack zip
        df_content = self._extract(results)
        print(len(df_content.index))
        
        #Prep
        #gdf = self._data_prep(df_content)
        
        #Load into db
        self.db.upload(gdf)
        self.db.test()
        self.db.close()
    
rh = RunHandler()
rh.run()