## Sandbox for thesis code

In [8]:
import requests
import pandas as pd
import pickle as pkl

Step 1: Import sample dataset from VIEWS to use for testing

In [2]:
api_url = "https://api.viewsforecasting.org"
run = "predictors_fatalities002_0000_00"
loa = "cm"

Make function to scrape data from API:

In [3]:
def retrieve_data_from_api(run: str, 
                           loa: str, 
                           verbose=False,
                           date_start=None,
                           date_end=None
                           ) -> pd.DataFrame:
    """ 
    Retrieve data from the API and return as a pandas DataFrame.

    Args:
        run (str): run identifier (e.g. fatalities001_2022_06_t01)
        loa (str): level of analysis; cm or pgm
        verbose (bool): whether to print progress messages
        start_date (str): start date for filtering data (YYYY-MM-DD), default is None
        end_date (str): end date for filtering data (YYYY-MM-DD), default is None

    Returns:
        pd.DataFrame: DataFrame containing the api data
    """

    # set up api url
    if date_start and date_end:
        api_url = f'https://api.viewsforecasting.org/{run}/{loa}?date_start={date_start}&date_end={date_end}'
    else:
        api_url = f'https://api.viewsforecasting.org/{run}/{loa}'

    # get response
    response = requests.get(api_url)

    # check response status
    page_data=response.json()

    master_list=[]
    master_list+=page_data['data']

    # loop through pages
    i = 1
    while page_data['next_page'] != '':

        # if verbose, print progress
        if verbose:
            print(f"Retrieving page {i}/{page_data['page_count']-1} at {loa} level...         ", end='\r', flush=True)

        r=requests.get(page_data['next_page'])
        page_data=r.json()

        master_list+=page_data['data']
        i += 1

    # convert to dataframe
    forecasts=pd.DataFrame(master_list)

    return forecasts

And retrieve data for the full range.

In [4]:
views_data = retrieve_data_from_api(run, loa, verbose=True)

Retrieving page 81/81 at cm level...         

And save as pickle file

In [11]:
with open("../data/raw/views_data.pkl", "wb") as f:
    pkl.dump(views_data, f)