<h1>Matt's eCFR API Demonstrator</h1>
<p>An intro to Jupyter Notebooks</p>
<p><i>This is a very important subtitle, to be updated later</i></p>

In [31]:
# Do my imports - datetime for date operations, pandas for data analysis, requests for HTTPS REST API requests, json for, well, javascript object notation, duh
import datetime as dt
import pandas as pd
import requests as rq
import json as json

# Set my constants
base_url = "https://www.ecfr.gov/api/"
titles_url = base_url + "versioner/v1/titles.json"
agencies_url = base_url + "admin/v1/agencies.json"
data_path = "../data/"

# Define my eCFR API wrapper class
class eCFR_API_Wrapper:
    """
    A wrapper for the eCFR API to fetch and process data.
    """

    def __init__(self, base_url):
        self.base_url = base_url

    def fetch_data(endpoint):
        """
        Fetch data from the eCFR API.
        """
        try:
            response = rq.get(f"{endpoint}")
            if response.status_code == 200:
                return response.json()
            else:
                response.raise_for_status()
        except: 
            print(f"Error fetching data from {endpoint}")
            return None

In [32]:

# initialize variables
titles_json= {'titles':[]}  #the titles_json dictionary to contain data
statusmsg = "initialized"  #use this to track what's going on 
lastbestdate = "1776-07-04"  #when was the eCFR last updated?
today = dt.date.today() #what day is it?
thirtydaysback = today-dt.timedelta(days=30)  #what day was it 30 days ago?

# test successful initialization
assert len(titles_json)>0
assert len(statusmsg)>0
assert str(today)>lastbestdate

print("Welcome to the MRWeCFR with base_url of "+base_url)

# Psuedocode
# 1. Let's see if we have a list of recent titles (less than 30 days old) in our titles.json file
# 2. If we do, let's see if we have the full and amendments of similar age, else, try and refresh titles.json 
# 3. If we are good with source data, skip to analysis, else, try and refresh the eCFR json details 
# 4. Analysis - let's load some basic semantic checks (wordcount, reading level) and some change trackers (count of changes, frequency over time)

try:
    # lets see if we have some files
    print("Let\'s do this - reading the file now")
    with open(data_path+'titles.json') as mytitlesfile:
        titles_json = json.load(mytitlesfile)


except FileNotFoundError:
    # oh dang. no file
    statusmsg = "file not found"
    print("Whoops - file not found. Ain\'t got nothin\' sorry")

else:
    # no exception occurred - woohoo
    statusmsg = "file loaded"
    print("Nothing to see here but these titles \n")


finally:
    # do this always

    if len(titles_json)>1:
        # we have useful data to check - just get the contents of the titles
        titles_dict = titles_json['titles']
        # in the array of titles, extract all of the up_to_date_as_of values
        key_to_find = "up_to_date_as_of"
        up_to_date_values = [t[key_to_find] for t in titles_dict if key_to_find in t]
        #Now, it's possible that this list of up_to_dates contains None aka null dates - filter them out and put the most recent date in lastbestdate
        #lastbestdate = max(list(filter(None,up_to_date_values)))
        if lastbestdate > str(thirtydaysback) :
            statusmsg = statusmsg + " with good data as of " + lastbestdate
        else:     
            statusmsg = statusmsg + " with stale data as of " + lastbestdate + ". We need to refresh"
            #call refresh API here
            new_titles=eCFR_API_Wrapper.fetch_data(titles_url)
            if(new_titles):
                # we got new data - update the titles_json
                titles_json = new_titles
                # and write it to the file
                with open(data_path+'titles.json', 'w') as mytitlesfile:
                    json.dump(titles_json, mytitlesfile)
                statusmsg = statusmsg + " and refreshed titles.json"
        
    else: 
        # failed to load useful data - darn.    
        statusmsg = statusmsg + " but data is no good"
    print("finished risky job with status of "+statusmsg)





Welcome to the MRWeCFR with base_url of https://www.ecfr.gov/api/
Let's do this - reading the file now
Nothing to see here but these titles 

finished risky job with status of file loaded with stale data as of 1776-07-04. We need to refresh and refreshed titles.json
