In [1]:
import requests
import pandas as pd
from io import StringIO
from concurrent.futures import ThreadPoolExecutor
from lookups import DataSources, Errors
from logging_handler import show_error_msg

In [2]:
# make it more dynamic
def fetch_data(source, limit):
    result = None
    try:
        response = requests.get(source.value, params={'$limit': limit})
        if response.status_code == 200:
            if source.name.startswith("PER_CAPITA"):
                df = pd.read_csv(StringIO(response.text))
                df.columns = ['date', source.name.split('_')[2].title()]
                result = (source.name.lower(), df)
            elif source.name.startswith("SHELTER"):
                result = (source.name.lower(), pd.DataFrame(response.json()))
    except Exception as e:
        show_error_msg(Errors.FETCHING_DATA_FROM_SOURCE.value + f" {source.name}", str(e))
    finally:
        return result

def readData(limit=1):
    income_dict = dict()
    sources = list(DataSources)
    with ThreadPoolExecutor(max_workers=5) as executor:
        results = list(executor.map(lambda source: fetch_data(source, limit), sources))
    try:
        for result in results:
            if result is not None:
                income_dict[result[0]] = result[1]
            else:
                print(f"{Errors.FETCHING_DATA_FROM_SOURCE.value}:{ result[0]}")
    except Exception as e:
        show_error_msg(Errors.READ_DATA_FN_ERROR.value,str(e))
    finally:
        return income_dict

In [3]:
dfs = readData()

In [5]:
dfs.keys()

dict_keys(['shelter_sonoma', 'shelter_austin_intakes', 'shelter_austin_outcomes', 'shelter_norfolk', 'shelter_bloomington', 'shelter_dallas_2017_2018', 'shelter_dallas_2018_2019', 'shelter_dallas_2019_2020', 'shelter_dallas_2020_2021', 'shelter_dallas_2021_2022', 'shelter_dallas_2022_2023', 'per_capita_sonoma_income', 'per_capita_austin_income', 'per_capita_norfolk_income', 'per_capita_bloomington_income', 'per_capita_dallas_income'])

In [6]:
for key, df in dfs.items():
    print(f"DataFrame: {key}")
    print(df.info())

DataFrame: shelter_sonoma
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   id             1 non-null      object
 1   type           1 non-null      object
 2   breed          1 non-null      object
 3   color          1 non-null      object
 4   sex            1 non-null      object
 5   date_of_birth  1 non-null      object
 6   intake_date    1 non-null      object
 7   outcome_date   1 non-null      object
 8   intake_type    1 non-null      object
 9   outcome_type   1 non-null      object
dtypes: object(10)
memory usage: 208.0+ bytes
None
DataFrame: shelter_austin_intakes
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   animal_id        1 non-null      object
 1   animal_type      1 non-null     

In [72]:
sonoma = dfs['shelter_sonoma']
sonoma.columns

Index(['id', 'type', 'breed', 'color', 'sex', 'date_of_birth', 'intake_date',
       'outcome_date', 'intake_type', 'outcome_type'],
      dtype='object')

In [73]:
sonoma.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26302 entries, 0 to 26301
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   id             26302 non-null  object
 1   type           26302 non-null  object
 2   breed          26302 non-null  object
 3   color          26302 non-null  object
 4   sex            26302 non-null  object
 5   date_of_birth  19817 non-null  object
 6   intake_date    26302 non-null  object
 7   outcome_date   26069 non-null  object
 8   intake_type    26302 non-null  object
 9   outcome_type   26064 non-null  object
dtypes: object(10)
memory usage: 2.0+ MB


In [82]:
sonoma[sonoma.duplicated(keep=False)]

Unnamed: 0,id,type,breed,color,sex,date_of_birth,intake_date,outcome_date,intake_type,outcome_type
14503,A411035,CAT,DOMESTIC SH,ORANGE,Neutered,2022-03-15T00:00:00.000,2022-04-16T00:00:00.000,2022-06-25T00:00:00.000,STRAY,ADOPTION
23945,A386296,CAT,DOMESTIC SH,BRN TABBY,Unknown,,2019-04-18T00:00:00.000,2019-04-18T00:00:00.000,STRAY,TRANSFER
25177,A386296,CAT,DOMESTIC SH,BRN TABBY,Unknown,,2019-04-18T00:00:00.000,2019-04-18T00:00:00.000,STRAY,TRANSFER
25919,A411035,CAT,DOMESTIC SH,ORANGE,Neutered,2022-03-15T00:00:00.000,2022-04-16T00:00:00.000,2022-06-25T00:00:00.000,STRAY,ADOPTION


In [92]:
test = sonoma[sonoma.duplicated(subset=['id'],keep=False)].sort_values('id')
test.loc[test['outcome_type']=='ADOPTION']

Unnamed: 0,id,type,breed,color,sex,date_of_birth,intake_date,outcome_date,intake_type,outcome_type
25454,A025557,DOG,CHIHUAHUA SH/BEAGLE,TAN/WHITE,Spayed,2003-01-01T00:00:00.000,2015-10-23T00:00:00.000,2015-11-04T00:00:00.000,ADOPTION RETURN,ADOPTION
23797,A025557,DOG,CHIHUAHUA SH/BEAGLE,TAN/WHITE,Spayed,2003-01-01T00:00:00.000,2015-09-02T00:00:00.000,2015-10-17T00:00:00.000,STRAY,ADOPTION
6689,A040915,DOG,LABRADOR RETR,BLACK,Spayed,2006-03-01T00:00:00.000,2014-01-17T00:00:00.000,2014-02-07T00:00:00.000,OWNER SURRENDER,ADOPTION
2064,A205364,DOG,DOBERMAN PINSCH/MIX,BLACK/TAN,Spayed,2007-04-26T00:00:00.000,2014-06-26T00:00:00.000,2014-07-22T00:00:00.000,STRAY,ADOPTION
23413,A224339,DOG,GERM SHEPHERD,BLACK/TAN,Neutered,2008-05-15T00:00:00.000,2016-01-28T00:00:00.000,2016-03-02T00:00:00.000,STRAY,ADOPTION
...,...,...,...,...,...,...,...,...,...,...
630,A419331,DOG,GERM SHEPHERD/MIX,TAN/WHITE,Spayed,2022-10-27T00:00:00.000,2023-09-06T00:00:00.000,2023-09-15T00:00:00.000,ADOPTION RETURN,ADOPTION
466,A419331,DOG,GERM SHEPHERD/MIX,TAN/WHITE,Spayed,2022-10-27T00:00:00.000,2023-07-27T00:00:00.000,2023-09-01T00:00:00.000,STRAY,ADOPTION
535,A419522,DOG,PARSON RUSS TER/MIX,WHITE/BROWN,Neutered,2021-08-08T00:00:00.000,2023-08-08T00:00:00.000,2023-09-08T00:00:00.000,STRAY,ADOPTION
636,A419522,DOG,PARSON RUSS TER/MIX,WHITE/BROWN,Neutered,2021-08-08T00:00:00.000,2023-09-09T00:00:00.000,2023-09-16T00:00:00.000,ADOPTION RETURN,ADOPTION


In [20]:
sonoma.head()

Unnamed: 0,id,type,breed,color,sex,date_of_birth,intake_date,outcome_date,intake_type,outcome_type
0,A328255,DOG,MALTESE/POODLE TOY,WHITE,Spayed,2014-10-06T00:00:00.000,2023-07-05T00:00:00.000,2023-08-08T00:00:00.000,STRAY,ADOPTION
1,A420799,CAT,DOMESTIC SH,BLACK,Unknown,,2023-09-30T00:00:00.000,2023-09-30T00:00:00.000,STRAY,TRANSFER
2,A420773,CAT,DOMESTIC LH,BLACK/WHITE,Neutered,2013-09-29T00:00:00.000,2023-09-29T00:00:00.000,2023-09-30T00:00:00.000,STRAY,RETURN TO OWNER
3,A420810,CAT,DOMESTIC SH,BRN TABBY,Male,2023-08-15T00:00:00.000,2023-09-30T00:00:00.000,2023-09-30T00:00:00.000,STRAY,TRANSFER
4,A417889,CAT,DOMESTIC SH,ORG TABBY/WHITE,Spayed,2023-05-07T00:00:00.000,2023-05-30T00:00:00.000,2023-08-08T00:00:00.000,STRAY,ADOPTION


In [27]:
sonoma.breed.nunique()

1052

In [28]:
sonoma.type.value_counts()

DOG      14695
CAT       9403
OTHER     2204
Name: type, dtype: int64