# Data Preparation

In [18]:
# Load the "autoreload" extension
%load_ext autoreload

# always reload modules marked with "%aimport"
%autoreload 1

In [19]:
import os
import sys

# add the 'src' directory as one where we can import modules
src_dir = os.path.join(os.getcwd(), os.pardir, 'src')
sys.path.append(src_dir)

# Definiton of an epiweek :

Epiweeks use the U.S. definition. That is, the first epiweek each year is the week, starting on a Sunday, containing January 4. 
ref : [epiweek](http://www.cmmcp.org/epiweek.htm)

In [28]:
#import labels
%autoreload
def states():
    """return state label"""
    #create empty dic
    s = [] 
    try:
        # read the data
        with open("../src/labels/states.txt") as f :
            lines = f.readlines()
            #remove new lines at the end
            s = [ line.strip('\n') for line in lines ]
    except FileNotFoundError:
        return None
    else:
        return s

In [29]:
## Testing Delphi epidata

# import my method from the source code
%aimport data.delphi_epidata
from data.delphi_epidata import Epidata

In [30]:
# Fetch State data from 2010 to 2015 in the US 
import pandas as pd

In [31]:
states = states()
print(states)

['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']


In [51]:
ili_states = []
for state in states:
    print("State {}".format(state))
    res = Epidata.ilinet(
         locations = state, #source
         epiweeks = [Epidata.range(200901,201601)]) #range
    if res['result'] == 1:
        print(res['result'], res['message'], len(res['epidata']))
        df = pd.DataFrame(res['epidata'])
        ili_states.append(df)
    else:
        print("(-2, u'no success')")

State AK
(1, u'success', 60)
State AL
(1, u'success', 171)
State AR
(1, u'success', 119)
State AZ
(1, u'success', 275)
State CA
(1, u'success', 275)
State CO
(1, u'success', 179)
State CT
(1, u'success', 280)
State DC
(-2, u'no success')
State DE
(1, u'success', 180)
State FL
(1, u'success', 275)
State GA
(1, u'success', 114)
State HI
(1, u'success', 14)
State IA
(1, u'success', 245)
State ID
(1, u'success', 145)
State IL
(1, u'success', 160)
State IN
(1, u'success', 150)
State KS
(1, u'success', 170)
State KY
(-2, u'no success')
State LA
(1, u'success', 223)
State MA
(1, u'success', 269)
State MD
(1, u'success', 176)
State ME
(1, u'success', 147)
State MI
(1, u'success', 221)
State MN
(1, u'success', 181)
State MO
(1, u'success', 180)
State MS
(1, u'success', 248)
State MT
(1, u'success', 181)
State NC
(1, u'success', 180)
State ND
(1, u'success', 24)
State NE
(1, u'success', 113)
State NH
(1, u'success', 190)
State NJ
(1, u'success', 279)
State NM
(1, u'success', 48)
State NV
(1, u's

In [49]:
delphi_data = pd.concat(ili_states)
delphi_data.head(60)

Unnamed: 0,epiweek,ili,ili_estimate,location,num_age_0,num_age_1,num_age_2,num_age_3,num_age_4,num_age_5,num_ili,num_patients,num_providers
0,201440,0.38,0.38,AK,,,,,,,,,
1,201441,0.38,0.38,AK,,,,,,,,,
2,201442,0.35,0.35,AK,,,,,,,,,
3,201443,1.39,1.39,AK,,,,,,,,,
4,201444,1.66,1.66,AK,,,,,,,,,
5,201445,2.78,2.78,AK,,,,,,,,,
6,201446,2.78,2.78,AK,,,,,,,,,
7,201447,2.63,2.63,AK,,,,,,,,,
8,201448,2.12,2.12,AK,,,,,,,,,
9,201449,1.89,1.89,AK,,,,,,,,,


In [46]:
delphi_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8159 entries, 0 to 169
Data columns (total 13 columns):
epiweek          8159 non-null int64
ili              8159 non-null float64
ili_estimate     8159 non-null float64
location         8159 non-null object
num_age_0        0 non-null object
num_age_1        0 non-null object
num_age_2        0 non-null object
num_age_3        0 non-null object
num_age_4        0 non-null object
num_age_5        0 non-null object
num_ili          0 non-null object
num_patients     0 non-null object
num_providers    0 non-null object
dtypes: float64(2), int64(1), object(10)
memory usage: 892.4+ KB


In [47]:
%store delphi_data

Stored 'delphi_data' (DataFrame)
