In [1]:
import pandas as pd
import os
import sys
import altair as alt
from outbreak_data import outbreak_common

In [38]:
state_list = ['USA_US-WA', 'USA_US-CO', 'USA_US-MN', 'USA_US-LA']
states = '(' + " OR ".join(state_list) + ')'

In [39]:
states

'(USA_US-WA OR USA_US-CO OR USA_US-MN OR USA_US-LA)'

In [None]:
server = 'api.outbreak.info'  # or 'dev.outbreak.info'
auth = ***REMOVED***  # keep this private!
nopage = 'fetch_all=true&page=0'  # worth verifying that this works with newer ES versions as well

def query_state(state, num_pages = 3, server = server, auth = auth):
    """
    Loads data from a location; Use 'OR' between locations to get multiple.
    Uses a paging mechanism when fetchall is set to True.
    
    Arguments:
        location: A string 
    
    Returns:
        A pandas dataframe
    
    """
    https_address = "https://api.outbreak.info/covid19/query?q=admin1:" + state + nopage
    state_req = requests.get(https_address)
    scroll_address = "https://api.outbreak.info/covid19/query?scroll_id="
    scroll_id = state_req.json()['_scroll_id']
    scroll_df = pd.DataFrame(columns = pd.Series(state_req.json()['hits'][0]).index)
    
    fetching_page = '&fetch_all=True&page='
    curr_page = 1
    while curr_page <= num_pages:
        data = state_req.json()['hits']
        #individual request df
        base_df = pd.DataFrame(columns = scroll_df.columns)
        for i in range(len(data)):
            data[i] = pd.Series(data[i], name = i)
            base_df = base_df.append(data[i], ignore_index=True)
        #reset scroll_id based on current scroll 
        scroll_id = state_req.json()['_scroll_id']
        to_scroll = scroll_address + scroll_id + fetching_page + str(curr_page)
        state_req = requests.get(to_scroll)
        scroll_df = scroll_df.append(base_df, ignore_index=True)
    #applying datetime to dates column and sorting in ascending 
    scroll_df['date'] = scroll_df['date'].apply(lambda x: pd.to_datetime(x))
    scroll_df = scroll_df.sort_values(by='date', ascending=True)
    scroll_df.reset_index(drop=True,inplace=True)
    return scroll_df

In [60]:
data = cases_by_location(states)

FGluY2x1ZGVfY29udGV4dF91dWlkDnF1ZXJ5VGhlbkZldGNoAxZoUXBfX2lsVlJHNkFQb0RtY3BKcndRAAAAAADIN7gWV2gwZWVfN2lTOTJ1TEZFNWJuQ0QzURZfLUh5UmtxOFNRLXhJMXFaX09OYWxnAAAAAADIFjcWaU94ME9Wb0NRc3FFSkdiT0lNdmdPURZoUXBfX2lsVlJHNkFQb0RtY3BKcndRAAAAAADIN7kWV2gwZWVfN2lTOTJ1TEZFNWJuQ0QzUQ==


In [70]:
data2 = cases_by_location(states)

FGluY2x1ZGVfY29udGV4dF91dWlkDnF1ZXJ5VGhlbkZldGNoAxZfLUh5UmtxOFNRLXhJMXFaX09OYWxnAAAAAADIGAAWaU94ME9Wb0NRc3FFSkdiT0lNdmdPURZBa3JlSUZsRlJBaUR2YnYtek85T0xRAAAAAABoBDoWMGFpWWh2OVRUUnlXZVlHZjRoVkRhZxZfLUh5UmtxOFNRLXhJMXFaX09OYWxnAAAAAADIGAEWaU94ME9Wb0NRc3FFSkdiT0lNdmdPUQ==


In [71]:
data.groupby('admin1').apply(len)

admin1
Colorado     277
Louisiana    273
Minnesota    450
dtype: int64

In [72]:
data2.groupby('admin1').apply(len)

admin1
Colorado     277
Louisiana    273
Minnesota    450
dtype: int64

In [76]:
data.tail()

Unnamed: 0,_id,_score,admin1,confirmed_numIncrease,date
995,USA_Minnesota_None2021-07-18,8.446903,Minnesota,0,2021-07-18
996,USA_Minnesota_None2021-07-19,8.446903,Minnesota,260,2021-07-19
997,USA_Minnesota_None2021-07-23,8.446903,Minnesota,371,2021-07-23
998,USA_Minnesota_None2021-07-28,8.446903,Minnesota,619,2021-07-28
999,USA_Minnesota_None2021-07-29,8.446903,Minnesota,543,2021-07-29


In [75]:
data2.tail()

Unnamed: 0,_id,_score,admin1,confirmed_numIncrease,date
995,USA_Minnesota_None2021-07-18,8.446903,Minnesota,0,2021-07-18
996,USA_Minnesota_None2021-07-19,8.446903,Minnesota,260,2021-07-19
997,USA_Minnesota_None2021-07-23,8.446903,Minnesota,371,2021-07-23
998,USA_Minnesota_None2021-07-28,8.446903,Minnesota,619,2021-07-28
999,USA_Minnesota_None2021-07-29,8.446903,Minnesota,543,2021-07-29


In [42]:
data.admin1.unique()

array(['Minnesota', 'Louisiana', 'Colorado'], dtype=object)

In [13]:
# base feature viz // amount of new covid cases
base = alt.Chart(states).mark_line().encode(
    x='date:T',
    y='confirmed_numIncrease:Q',
    color='admin1:N'
).interactive()

In [None]:
# 7-day rolled (avg) feature viz // avg new amount of covid cases, per 7 days
rolling = alt.Chart(url).mark_line().encode(
    x='date:T',
    y='confirmed_numIncrease_7dayRolling:Q',
    color='admin1:N'
).interactive()

In [7]:
# to_json // writing the data in json format for quick visualization
url = 'rolling_viz.json'
rolling_states.to_json(url, orient='records')

In [14]:
# 7-day rolled (avg) feature viz // avg new amount of covid cases, per 7 days
rolling = alt.Chart(url).mark_line().encode(
    x='date:T',
    y='confirmed_numIncrease_7dayRolling:Q',
    color='admin1:N'
).interactive()

In [9]:
# base feature viz // amount of new covid cases
base = alt.Chart(url).mark_line().encode(
    x='date:T',
    y='confirmed_numIncrease:Q',
    color='admin1:N'
).interactive()

In [15]:
(rules + base)

In [10]:
# rules // added dates of popular tourism dates
rules = alt.Chart(pd.DataFrame({
  'date': ['2022-9-01', '2021-09-01', '2020-09-01'],
  'color': ['black', 'black', 'black']
})).mark_rule().encode(
  x='date:T',
  color=alt.Color('color:N', scale=None)
)

#### Plot of numIncrease vs  numIncrease_7dayRolling (confirmed cases)

In [11]:
(rules + base) | (rules + rolling)