In [6]:
import os
import numpy as np
import pandas as pd
import datetime

def smushed_notation(a):
    for i in np.arange(0,len(a)):
        item = a[i]
        item = item.upper()
        item = item.replace(' ','')
        item = item.replace('.','')
        item = item.replace('/','')
        item = item.replace('-','')
        item = item.replace('\'','')
        item = item.replace('COUNTY','')
        item = item.replace('PARISH','')
        a[i] = item
    return a


def fips_finder(smushed,key):
    '''
    input
    smushed = 1-D array in format of 'COUNTY,STATE'
    key = 2-D array in format col1:'FIPS' , col2:'COUNTY,STATE'
    
    output
    1-D array of fips values matching to smushed
    if no fips value exists, FIPS='.'
    '''
    fipslist=[]
    for i in np.arange(0,len(smushed)):
        n=0
        for j in np.arange(0,len(key)):
            if (smushed[i] == key[j,1]):
                fipslist.append(str(key[j,0]))
                n=1
        if (n==0):
            fipslist.append('.')
    fipslist = np.array(fipslist)
    # Catch errors?
    cond = [fipslist[i]=='.' for i in np.arange(0,len(fipslist))]
    if (sum(cond)/len(cond)) >= 0.25:
        print('Error likely, more than one fourth unmatched')
    return fipslist


subs = {
        'AK': 'Alaska',
        'AL': 'Alabama',
        'AR': 'Arkansas',
        'AS': 'American Samoa',
        'AZ': 'Arizona',
        'CA': 'California',
        'CO': 'Colorado',
        'CT': 'Connecticut',
        'D.C.': 'District of Columbia',
        'DE': 'Delaware',
        'FL': 'Florida',
        'GA': 'Georgia',
        'GU': 'Guam',
        'HI': 'Hawaii',
        'IA': 'Iowa',
        'ID': 'Idaho',
        'IL': 'Illinois',
        'IN': 'Indiana',
        'KS': 'Kansas',
        'KY': 'Kentucky',
        'LA': 'Louisiana',
        'MA': 'Massachusetts',
        'MD': 'Maryland',
        'ME': 'Maine',
        'MI': 'Michigan',
        'MN': 'Minnesota',
        'MO': 'Missouri',
        'MP': 'Northern Mariana Islands',
        'MS': 'Mississippi',
        'MT': 'Montana',
        'NA': 'National',
        'NC': 'North Carolina',
        'ND': 'North Dakota',
        'NE': 'Nebraska',
        'NH': 'New Hampshire',
        'NJ': 'New Jersey',
        'NM': 'New Mexico',
        'NV': 'Nevada',
        'NY': 'New York',
        'OH': 'Ohio',
        'OK': 'Oklahoma',
        'OR': 'Oregon',
        'PA': 'Pennsylvania',
        'PR': 'Puerto Rico',
        'RI': 'Rhode Island',
        'SC': 'South Carolina',
        'SD': 'South Dakota',
        'TN': 'Tennessee',
        'TX': 'Texas',
        'UT': 'Utah',
        'VA': 'Virginia',
        'VI': 'Virgin Islands',
        'VT': 'Vermont',
        'WA': 'Washington',
        'WI': 'Wisconsin',
        'WV': 'West Virginia',
        'WY': 'Wyoming'
}

######################################################################
######################################################################
######################################################################
######################################################################
######################################################################

In [79]:
code_COUNTYSTATE = pd.read_csv('../FIPS_tags/code_COUNTYSTATE.csv', dtype=object)
code_COUNTYSTATE_np = np.array(code_COUNTYSTATE)
all_counties = np.array(code_COUNTYSTATE['COUNTY,STATE'])



# Read in all un-analyzed daily updates
f = open('../Daily Updates/finished.txt','r')
finished = f.read()
f.close()

os.chdir('../Daily Updates/')
dir_contents = np.array(os.listdir())
cond = [dir_contents[i][-4:] =='.csv' for i in np.arange(0,len(dir_contents))]
csv_files1 = dir_contents[cond]

csv_files =  np.array(list(set(csv_files1) - set(finished.split('\n'))))



######################################################################

datetime_key = pd.read_csv('../Daily Updates/key/File_date_key.csv')
datetime_key = np.array(datetime_key)


g = open('../DAILY_timeseries_formatted.csv', 'a')
for i in np.arange(0,len(csv_files)):
    combo=[]
    current_file= csv_files[i]
    file = pd.read_csv(current_file)
    empty_file = np.chararray((len(file),3),itemsize = 30)
    
    ##########    PART 1:  Get FIPS Code     ############
    file = np.array(file)
    county = file[:,1]
    
    # Convert states to abbreviated format
    long_states = file[:,0]
    rev_subs = { v:k for k,v in subs.items()}
    state = [rev_subs.get(item,item)  for item in long_states]
    
    # Handle exceptions
    for j in np.arange(0,len(file)):
        if state[j] == 'D.C.':
            combo.append('Washington,DC')
        elif state[j] == 'GU':
            combo.append('Guam,GU')
        elif state[j] == 'PR':
            combo.append('SANJUAN,PR')
        elif state[j] == 'VI':
            combo.append('STTHOMAS,VI')
        else:
            combo.append((county[j]+','+state[j]))
    
    # Make it smushed
    daily_cases = np.array(combo)
    daily_cases = smushed_notation(daily_cases)
    # Find FIPS
    fips_indicator = fips_finder(daily_cases,code_COUNTYSTATE_np)
    
    ##########    PART 2:  Get Datetime     ############
    
    datetimes = np.chararray((len(file)),itemsize = 30)
    z = 0
    
    for j in np.arange(0,len(datetime_key)):
        if (current_file == datetime_key[j,0]):
            datetimes[:] = datetime_key[j,1]
            z = 1
    if z == 0:
        print('Error:  File_date_key does not reflect file name')
    
    datetimes = datetimes.decode('utf-8')
    
    ##########    PART 3:  Combine & Clean     ############
    
    check = np.column_stack((fips_indicator,datetimes,file[:,2]))
    
    cond_hasFIPS = (check[:,0]!='.')
    good_data = check[cond_hasFIPS]
    
    good_data_pd = pd.DataFrame(good_data, columns=['FIPS','Datetime','Cases'])
    
    
    ##########    PART 4:  Track information     ############
    
    # Record this data has been analyzed
    f = open('../Daily Updates/finished.txt','a')
    f.write(current_file+'\n')
    f.close()
    
    # Send to data file
    good_data_pd.to_csv(g,header=False, index=False)
    
    
g.close()