In [2]:
import requests
import pandas as pd
from io import StringIO
from concurrent.futures import ThreadPoolExecutor
from lookups import DataSources

In [10]:
# make it more dynamic
def fetch_data(source, limit):
    response = requests.get(source.value, params={'$limit': limit})
    if response.status_code == 200:
        if source.name.startswith("PER_CAPITA"):
            df = pd.read_csv(StringIO(response.text))
            df.columns = ['date', source.name.split('_')[2].title()]
            return source.name.lower(), df
        elif source.name.startswith("SHELTER"):
            return source.name.lower(), pd.DataFrame(response.json())
    return None

def readData(limit=1):
    income_dict = dict()
    sources = list(DataSources)

    with ThreadPoolExecutor(max_workers=5) as executor:
        results = list(executor.map(lambda source: fetch_data(source, limit), sources))

    for result in results:
        if result is not None:
            income_dict[result[0]] = result[1]

    return income_dict

In [13]:
dfs = readData(limit=1000000)

In [14]:
dfs.keys()

dict_keys(['shelter_sonoma', 'shelter_austin_intakes', 'shelter_austin_outcomes', 'shelter_norfolk', 'shelter_bloomington', 'shelter_dallas_2017_2018', 'shelter_dallas_2018_2019', 'shelter_dallas_2019_2020', 'shelter_dallas_2020_2021', 'shelter_dallas_2021_2022', 'shelter_dallas_2022_2023', 'per_capita_sonoma_income', 'per_capita_austin_income', 'per_capita_norfolk_income', 'per_capita_bloomington_income', 'per_capita_dallas_income'])

In [17]:
for key, df in dfs.items():
    print(f"DataFrame: {key}")
    print(df.info())

DataFrame: shelter_sonoma
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26302 entries, 0 to 26301
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   id             26302 non-null  object
 1   type           26302 non-null  object
 2   breed          26302 non-null  object
 3   color          26302 non-null  object
 4   sex            26302 non-null  object
 5   date_of_birth  19817 non-null  object
 6   intake_date    26302 non-null  object
 7   outcome_date   26069 non-null  object
 8   intake_type    26302 non-null  object
 9   outcome_type   26064 non-null  object
dtypes: object(10)
memory usage: 2.0+ MB
None
DataFrame: shelter_austin_intakes
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156093 entries, 0 to 156092
Data columns (total 8 columns):
 #   Column           Non-Null Count   Dtype 
---  ------           --------------   ----- 
 0   animal_id        156093 non-null  object
 1   animal_type     