In [1]:
import pandas as pd

In [2]:
# read in original data.  this data contains cumulative cases per texas county per day
# a little extra provisions necessary to skip extraneous rows and to make columns what we want them to be

data_file_df = pd.read_excel('Texas COVID-19 Case Count Data by County.xlsx', skiprows={0, 1, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267})
data_file_df.rename (columns=lambda x: x[-4:], inplace=True)   # This will need adjustment come October

# data for 3-07 and 3-08 are missing - so we'll pretend that any data before that is missing as well.
# these are mostly zero anyway.
del data_file_df["3-04"]
del data_file_df["3-05"]
del data_file_df["3-06"]

# a few more fixes we know we need to make:
# data_file_df.rename(columns={'-15*':'7-15', '17**':'7-17'}  )

# the above rename didn't work, so here is an alternative (and kludgy) way of doing it:
cols = data_file_df.columns.values
x = 0
for col in cols:
    if col == '-15*':
        cols[x] = '7-15'
    elif col == '17**':
        cols[x] = '7-17'
    x = x + 1
    
data_file_df.columns = cols

In [3]:
def assign_grade(value):

    
# original binning values:   20  10  8  6  4  2  >0  0  <0
#          labels:           A   B   C  D  E  F  G       N

    grade = " "
    if value >= 40:
        grade = ">40"
    elif value >= 28:
        grade = ">28"
    elif value >= 21:
        grade = ">21"
    elif value >= 15:
        grade = ">15"
    elif value >= 10:
        grade = ">10"
    elif value >= 5:
        grade = ">5"
    elif value > 0:
        grade = ">0"
    elif value == 0:
        grade = "none"
    elif value < 0:
        grade = "negative"
        
    return (grade);


In [4]:
# Additional county information not present in main data file

codes_df = pd.read_csv('codes2.csv')
codes_df.head()

Unnamed: 0,Name,Section,Number,Population,FIPS
0,Anderson,J,11,58199,48001
1,Andrews,C,11,22269,48003
2,Angelina,J,23,90437,48005
3,Aransas,N,29,27699,48007
4,Archer,F,13,8344,48009


In [5]:
covid19_df = pd.merge(codes_df, data_file_df,  on="Name")
covid19_df

Unnamed: 0,Name,Section,Number,Population,FIPS,3-09,3-10,3-11,3-12,3-13,...,8-02,8-03,8-04,8-05,8-06,8-07,8-08,8-09,8-10,8-11
0,Anderson,J,11,58199,48001,0,0,0,0,0,...,2209,2307,2329,2355,2353,2379,2402,2402,2403,2406
1,Andrews,C,11,22269,48003,0,0,0,0,0,...,266,267,268,273,277,279,297,297,304,304
2,Angelina,J,23,90437,48005,0,0,0,0,0,...,1647,1647,1745,1755,1769,1796,1796,1796,1796,1828
3,Aransas,N,29,27699,48007,0,0,0,0,0,...,157,158,158,162,165,169,172,173,177,182
4,Archer,F,13,8344,48009,0,0,0,0,0,...,20,20,20,20,21,21,21,21,21,27
5,Armstrong,E,33,1948,48011,0,0,0,0,0,...,8,8,9,7,7,8,8,8,8,8
6,Atascosa,N,14,51831,48013,0,0,0,0,0,...,405,405,413,419,425,434,437,437,437,456
7,Austin,K,12,30402,48015,0,0,0,0,0,...,213,213,216,218,223,230,230,242,249,269
8,Bailey,D,11,7692,48017,0,0,0,0,0,...,158,158,159,166,168,170,171,171,171,175
9,Bandera,B,25,21246,48019,0,0,0,0,0,...,83,83,83,89,89,89,91,91,91,92


In [6]:
# create a parallel dataframe with rates per 10000 people in county
covid19rate_df = covid19_df.copy()

ylim, xlim = covid19rate_df.shape

for y in range(0, ylim):
    pop10000 = covid19rate_df.iloc[y,3]/10000
    for x in range(5, xlim):
        covid19rate_df.iloc[y,x] = covid19rate_df.iloc[y,x]/pop10000

In [7]:
 covid19rate_df

Unnamed: 0,Name,Section,Number,Population,FIPS,3-09,3-10,3-11,3-12,3-13,...,8-02,8-03,8-04,8-05,8-06,8-07,8-08,8-09,8-10,8-11
0,Anderson,J,11,58199,48001,0.0,0.0,0.0,0.0,0.0,...,379.559786,396.398564,400.178697,404.646128,404.302479,408.769910,412.721868,412.721868,412.893692,413.409165
1,Andrews,C,11,22269,48003,0.0,0.0,0.0,0.0,0.0,...,119.448561,119.897616,120.346670,122.591944,124.388163,125.286272,133.369258,133.369258,136.512641,136.512641
2,Angelina,J,23,90437,48005,0.0,0.0,0.0,0.0,0.0,...,182.115727,182.115727,192.952000,194.057742,195.605781,198.591285,198.591285,198.591285,198.591285,202.129659
3,Aransas,N,29,27699,48007,0.0,0.0,0.0,0.0,0.0,...,56.680747,57.041770,57.041770,58.485866,59.568938,61.013033,62.096105,62.457128,63.901224,65.706343
4,Archer,F,13,8344,48009,0.0,0.0,0.0,0.0,0.0,...,23.969319,23.969319,23.969319,23.969319,25.167785,25.167785,25.167785,25.167785,25.167785,32.358581
5,Armstrong,E,33,1948,48011,0.0,0.0,0.0,0.0,0.0,...,41.067762,41.067762,46.201232,35.934292,35.934292,41.067762,41.067762,41.067762,41.067762,41.067762
6,Atascosa,N,14,51831,48013,0.0,0.0,0.0,0.0,0.0,...,78.138566,78.138566,79.682044,80.839652,81.997260,83.733673,84.312477,84.312477,84.312477,87.978237
7,Austin,K,12,30402,48015,0.0,0.0,0.0,0.0,0.0,...,70.061180,70.061180,71.047957,71.705809,73.350437,75.652918,75.652918,79.600026,81.902506,88.481021
8,Bailey,D,11,7692,48017,0.0,0.0,0.0,0.0,0.0,...,205.408216,205.408216,206.708268,215.808632,218.408736,221.008840,222.308892,222.308892,222.308892,227.509100
9,Bandera,B,25,21246,48019,0.0,0.0,0.0,0.0,0.0,...,39.066177,39.066177,39.066177,41.890238,41.890238,41.890238,42.831592,42.831592,42.831592,43.302269


In [8]:
# create a parallel dataframe showing each day's increment of cases per county
covid19incr_df = covid19_df.copy()
ylim, xlim = covid19incr_df.shape

for y in range(0, ylim):
    for x in range(6, xlim):
        covid19incr_df.iloc[y,x] = covid19_df.iloc[y,x] - covid19_df.iloc[y,x-1]

In [9]:
# compute the seven-day rolling average of those increments

covid19ravg_df = covid19_df.copy()
ylim, xlim = covid19ravg_df.shape

for y in range(0, ylim):
    pop100000 = covid19rate_df.iloc[y,3]/100000
    for x in range(11, xlim):
        covid19ravg_df.iloc[y,x] = ((covid19incr_df.iloc[y,x] + covid19incr_df.iloc[y,x-1] +
                                    covid19incr_df.iloc[y,x-2] + covid19incr_df.iloc[y,x-3] +
                                    covid19incr_df.iloc[y,x-4] + covid19incr_df.iloc[y,x-5] +
                                    covid19incr_df.iloc[y,x-6]) /  7) / pop100000

In [10]:
# these days come before a seven-day rolling average can be computed, and so are not wanted

del covid19ravg_df["3-09"]
del covid19ravg_df["3-10"]
del covid19ravg_df["3-11"]
del covid19ravg_df["3-12"]
del covid19ravg_df["3-13"]
del covid19ravg_df["3-15"]  # 3-14 is also missing.

covid19ravg_df

Unnamed: 0,Name,Section,Number,Population,FIPS,3-16,3-17,3-18,3-19,3-20,...,8-02,8-03,8-04,8-05,8-06,8-07,8-08,8-09,8-10,8-11
0,Anderson,J,11,58199,48001,0.000000,0.000000,0.000000,0.000000,0.000000,...,16.936963,40.501432,40.992359,44.919770,43.937917,44.919770,47.374403,47.374403,23.564470,18.900668
1,Andrews,C,11,22269,48003,0.000000,0.000000,0.000000,0.000000,0.000000,...,26.301778,25.018764,23.094244,22.452737,15.396163,15.396163,26.943284,19.886710,23.735751,23.094244
2,Angelina,J,23,90437,48005,0.000000,0.000000,0.000000,0.000000,0.000000,...,38.227085,38.227085,40.754495,29.855037,25.747995,30.013000,23.536511,23.536511,23.536511,13.110942
3,Aransas,N,29,27699,48007,0.000000,0.000000,0.000000,0.000000,0.000000,...,15.988200,13.925206,11.862213,13.409458,8.251974,8.251974,9.799219,8.251974,9.799219,12.377961
4,Archer,F,13,8344,48009,0.000000,0.000000,0.000000,0.000000,0.000000,...,8.560471,8.560471,8.560471,8.560471,6.848377,6.848377,1.712094,1.712094,1.712094,11.984660
5,Armstrong,E,33,1948,48011,0.000000,0.000000,0.000000,0.000000,0.000000,...,29.334116,29.334116,29.334116,14.667058,14.667058,22.000587,0.000000,0.000000,0.000000,-7.333529
6,Atascosa,N,14,51831,48013,0.000000,0.000000,0.000000,0.000000,0.000000,...,3.307453,3.307453,4.685558,6.339284,5.512421,7.993010,8.819873,8.819873,8.819873,11.851705
7,Austin,K,12,30402,48015,0.000000,0.000000,0.000000,0.000000,0.000000,...,14.566711,9.397878,6.578515,4.229045,5.638727,8.458090,7.988196,13.626923,16.916180,24.904377
8,Bailey,D,11,7692,48017,0.000000,0.000000,0.000000,0.000000,0.000000,...,48.287646,48.287646,33.429909,33.429909,31.572691,26.001040,24.143823,24.143823,24.143823,29.715474
9,Bandera,B,25,21246,48019,0.000000,0.000000,0.000000,0.000000,0.000000,...,8.068746,8.068746,3.361977,6.723955,4.034373,4.034373,5.379164,5.379164,5.379164,6.051559


In [11]:
# assign grades to rates of occurance

covid19grad_df = covid19ravg_df.copy()
ylim, xlim = covid19grad_df.shape

for y in range(0, ylim):
    for x in range(5, xlim):
#        if (covid19grad_df.iloc[y,x] >= 20):
#            covid19grad_df.iloc[y,x] = "A"
#        elif (covid19grad_df.iloc[y,x] >= 10):
#            covid19grad_df.iloc[y,x] = "B"
#        elif (covid19grad_df.iloc[y,x] >= 8):
#            covid19grad_df.iloc[y,x] = "C"
#        elif (covid19grad_df.iloc[y,x] >= 6):
#            covid19grad_df.iloc[y,x] = "D"
#        elif (covid19grad_df.iloc[y,x] >= 4):
#            covid19grad_df.iloc[y,x] = "E"
#        elif (covid19grad_df.iloc[y,x] >= 2):
#            covid19grad_df.iloc[y,x] = "F"
#        elif (covid19grad_df.iloc[y,x] > 0):
#            covid19grad_df.iloc[y,x] = "G";
#        else: covid19grad_df.iloc[y,x] = " ";
        covid19grad_df.iloc[y,x] = assign_grade(covid19grad_df.iloc[y,x])
            
covid19grad_df
            

Unnamed: 0,Name,Section,Number,Population,FIPS,3-16,3-17,3-18,3-19,3-20,...,8-02,8-03,8-04,8-05,8-06,8-07,8-08,8-09,8-10,8-11
0,Anderson,J,11,58199,48001,none,none,none,none,none,...,>15,>40,>40,>40,>40,>40,>40,>40,>21,>15
1,Andrews,C,11,22269,48003,none,none,none,none,none,...,>21,>21,>21,>21,>15,>15,>21,>15,>21,>21
2,Angelina,J,23,90437,48005,none,none,none,none,none,...,>28,>28,>40,>28,>21,>28,>21,>21,>21,>10
3,Aransas,N,29,27699,48007,none,none,none,none,none,...,>15,>10,>10,>10,>5,>5,>5,>5,>5,>10
4,Archer,F,13,8344,48009,none,none,none,none,none,...,>5,>5,>5,>5,>5,>5,>0,>0,>0,>10
5,Armstrong,E,33,1948,48011,none,none,none,none,none,...,>28,>28,>28,>10,>10,>21,none,none,none,negative
6,Atascosa,N,14,51831,48013,none,none,none,none,none,...,>0,>0,>0,>5,>5,>5,>5,>5,>5,>10
7,Austin,K,12,30402,48015,none,none,none,none,none,...,>10,>5,>5,>0,>5,>5,>5,>10,>15,>21
8,Bailey,D,11,7692,48017,none,none,none,none,none,...,>40,>40,>28,>28,>28,>21,>21,>21,>21,>28
9,Bandera,B,25,21246,48019,none,none,none,none,none,...,>5,>5,>0,>5,>0,>0,>5,>5,>5,>5


In [12]:
covid19rate_df.to_csv("covid19rate.csv", index=False, header=True)
covid19ravg_df.to_csv("covid19ravg.csv", index=False, header=True)
covid19grad_df.to_csv("covid19grad.csv", index=False, header=True)
covid19incr_df.to_csv("covid19incr.csv", index=False, header=True)

In [13]:
# This puts the data in a format that's friendlier to Tableau

counties = []
populations = []
dates = []
values = []
fips = []
grades = []
columns = covid19ravg_df.columns

ylim, xlim = covid19ravg_df.shape

for y in range(0, ylim):
    cty = covid19ravg_df.iloc[y,0]
    pop = covid19ravg_df.iloc[y,3]
    fip = covid19ravg_df.iloc[y,4]
    for x in range(5, xlim):
        counties.append(cty)
        populations.append(pop)
        fips.append(fip)
        dates.append(columns[x])
        values.append(covid19ravg_df.iloc[y,x])
        grades.append(assign_grade(covid19ravg_df.iloc[y,x]))
        
covid19ravg2_df = pd.DataFrame({
    "Counties": counties,
    "Populations": populations,
    "FIPS": fips,
    "Dates": dates,
    "Values": values,
    "Grades": grades
    })

covid19ravg2_df

Unnamed: 0,Counties,Populations,FIPS,Dates,Values,Grades
0,Anderson,58199,48001,3-16,0.000000,none
1,Anderson,58199,48001,3-17,0.000000,none
2,Anderson,58199,48001,3-18,0.000000,none
3,Anderson,58199,48001,3-19,0.000000,none
4,Anderson,58199,48001,3-20,0.000000,none
5,Anderson,58199,48001,3-21,0.000000,none
6,Anderson,58199,48001,3-22,0.000000,none
7,Anderson,58199,48001,3-23,0.000000,none
8,Anderson,58199,48001,3-24,0.000000,none
9,Anderson,58199,48001,3-25,0.000000,none


In [14]:
covid19ravg2_df.to_csv("covid19ravg2.csv", index=False, header=True)