In [1]:
import os
import numpy as np
import pandas as pd
import datetime

def smushed_notation(a):
    for i in np.arange(0,len(a)):
        item = a[i]
        item = item.upper()
        item = item.replace(' ','')
        item = item.replace('.','')
        item = item.replace('/','')
        item = item.replace('-','')
        item = item.replace('\'','')
        item = item.replace('COUNTY','')
        item = item.replace('PARISH','')
        a[i] = item
    return a


def fips_finder(smushed,key):
    '''
    input
    smushed = 1-D array in format of 'COUNTY,STATE'
    key = 2-D array in format col1:'FIPS' , col2:'COUNTY,STATE'
    
    output
    1-D array of fips values matching to smushed
    if no fips value exists, FIPS='.'
    '''
    fipslist=[]
    for i in np.arange(0,len(smushed)):
        n=0
        for j in np.arange(0,len(key)):
            if (smushed[i] == key[j,1]):
                fipslist.append(str(key[j,0]))
                n=1
        if (n==0):
            fipslist.append('.')
    fipslist = np.array(fipslist)
    # Catch errors?
    cond = [fipslist[i]=='.' for i in np.arange(0,len(fipslist))]
    if (sum(cond)/len(cond)) >= 0.25:
        print('Error likely, more than one fourth unmatched')
    return fipslist


######################################################################
######################################################################
######################################################################
######################################################################
######################################################################

# Read in JHU Data
JHU_data =  pd.read_csv('../JHU dataset/JHU_filtered_timeseries.csv')
code_COUNTYSTATE = pd.read_csv('../FIPS_tags/code_COUNTYSTATE.csv', dtype=object)

code_COUNTYSTATE_np = np.array(code_COUNTYSTATE)

######################################################################
#  JHU Data
######################################################################

# Look for JHU data with cases present
JHU_data_np = np.array(JHU_data)
JHU_locs_with_cases = smushed_notation(JHU_data_np[:,0])

# Create Boolean flag if county has recorded case
fipsvalues = fips_finder(JHU_locs_with_cases,code_COUNTYSTATE_np)

######################################################################
#  Create header row with datetimes
######################################################################

header_row = ['FIPS']

for i in np.arange(4,len(JHU_data_np[0])):
    date = JHU_data.columns[i]
    parts = date.split('/')
    if len(parts[0])==1:
        parts[0] = '0'+parts[0]
    if len(parts[1])==1:
        parts[1] = '0'+parts[1]
    parts[2] = parts[2]+'20'
    day = (parts[0]+'/'+parts[1]+'/'+parts[2])
    header_row.append(str(datetime.datetime.strptime(day, '%m/%d/%Y')))

######################################################################
#  Create header row with datetimes
######################################################################

full_pic = np.column_stack((fipsvalues,JHU_data_np[:,4:]))


important_info = np.vstack((header_row,full_pic))
array_length = (len(important_info[0])-1)*(len(important_info[:,0])-1)
empty_array = np.chararray((array_length,3),itemsize='100')

k = 0
for i in np.arange(1,len(important_info[0])):
    for j in np.arange(1,len(important_info)):
        empty_array[k,0] = str(important_info[j,0])
        empty_array[k,1] = str(important_info[0,i])
        empty_array[k,2] = str(important_info[j,i])
        k += 1
        
        
JHU_3col_np = empty_array.decode('utf-8')
JHU_3col_pd = pd.DataFrame(JHU_3col_np, columns=['FIPS','Datetime','Cases'])
JHU_3col_pd.to_csv('../JHU_FIPS_datetime_cases.csv',index=False)
