### NYT COVID
https://github.com/nytimes/covid-19-data

### Download selected data and create graphs

In [17]:
using CSV, DataFrames, Plots, Dates #, Interact

In [18]:
gr()
#plotly()

Plots.GRBackend()

### Utilities

In [19]:
function percent_change(df, field, day_idx=0, offset=14) 
    ## Assumes df selected for state or state/county
    ## day_idx is most current day
    r,_ = size(df)
    r > offset + day_idx || return(NaN)
    a,b = df[!,field][[end - day_idx - offset, end - day_idx]]
    (b/a -1) *100
end

percent_change (generic function with 3 methods)

In [20]:
function percent_change_2(df, day::Date, delta::Integer)
    d2 = day
    d1 = day - Dates.Day(delta)
    a,b=filter(r-> r.date == d2 || r.date == d1 ,[:date, field])[:,2]
    return (b/a -1) *100
end

percent_change_2 (generic function with 1 method)

In [21]:
function moving_avg(col, days)
    total = copy(col)
    days <= length(col) + 1 || (total = NaN; return(total))
    for i in 1:days-1
        new = col[1:i]
        append!(new,col[1:end-i])
        total = total .+ new
    end
    total./days
end

moving_avg (generic function with 1 method)

In [22]:
function augment_cols!(df,days_ma=7) # Assumes {:cases, :deaths} exist
    df[:,:casesIncrease]   = append!([0],df.cases[2:end] - df.cases[1:end-1])
    df[:,:deathsIncrease]  = append!([0],df.deaths[2:end] - df.deaths[1:end-1])
    df[:,:casesMA]         = moving_avg(df.cases,days_ma)
    df[:,:deathsMA]        = moving_avg(df.deaths,days_ma)
    df[:,:casesIncreaseMA] = moving_avg(df.casesIncrease,days_ma)
    df[:,:deathsIncreaseMA]= moving_avg(df.deathsIncrease,days_ma)
    df
end

augment_cols! (generic function with 2 methods)

In [23]:
function get_state_population(state::String)::Integer
    # Use state abbreviations, i.e., "IL"
    state = "US_$(state)"
    url = "https://open-covid-19.github.io/data/metadata.csv"
    df = CSV.read(download(url),silencewarnings=true)
    first(df[df.Key.==state,:Population])
end;

### Get NYT State COVID Data

All data for NYT is ordered from earliest to lastest date

#### Download the NYT COVID Data for each State

In [24]:
function get_nyt_state_covid_data!(ma_days::Integer=7) # All states
    url = "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv"
    df = CSV.read(download(url),silencewarnings=true)
    df = sort(df)
    df[!,:casesIncrease]   .= 0
    df[!,:deathsIncrease]  .= 0
    df[!,:casesMA]         .= 0.0
    df[!,:deathsMA]        .= 0.0
    df[!,:casesIncreaseMA] .= 0.0
    df[!,:deathsIncreaseMA].= 0.0
    for s in groupby(df,:state)
        augment_cols!(s,ma_days)
    end
    df
end

get_nyt_state_covid_data! (generic function with 2 methods)

dfs is a DataFrame of $State$ Data

In [25]:
dfs = get_nyt_state_covid_data!();

#### Select data for a specific $State$

In [26]:
select_region(dfs,state::String) = filter(r-> r.state .== state,dfs) # dfs should be for states only

select_region (generic function with 1 method)

In [27]:
last(select_region(dfs,"Illinois"))

Unnamed: 0_level_0,date,state,fips,cases,deaths,casesIncrease,deathsIncrease,casesMA
Unnamed: 0_level_1,Date,String,Int64,Int64,Int64,Int64,Int64,Float64
114,2020-05-16,Illinois,17,92669,4149,2141,74,85157.9


In [28]:
st = "Illinois"
for r in eachrow(filter(r->r.state == st,dfs)[:,[:date,:cases,:casesIncrease,:casesIncreaseMA]])
    println("$(r.date)\t$(r.cases)\t$(r.casesIncrease)\t$(r.casesIncreaseMA)")
end

2020-01-24	1	0	0.0
2020-01-25	1	0	0.0
2020-01-26	1	0	0.0
2020-01-27	1	0	0.0
2020-01-28	1	0	0.0
2020-01-29	1	0	0.0
2020-01-30	2	1	0.14285714285714285
2020-01-31	2	0	0.14285714285714285
2020-02-01	2	0	0.14285714285714285
2020-02-02	2	0	0.14285714285714285
2020-02-03	2	0	0.14285714285714285
2020-02-04	2	0	0.14285714285714285
2020-02-05	2	0	0.14285714285714285
2020-02-06	2	0	0.0
2020-02-07	2	0	0.0
2020-02-08	2	0	0.0
2020-02-09	2	0	0.0
2020-02-10	2	0	0.0
2020-02-11	2	0	0.0
2020-02-12	2	0	0.0
2020-02-13	2	0	0.0
2020-02-14	2	0	0.0
2020-02-15	2	0	0.0
2020-02-16	2	0	0.0
2020-02-17	2	0	0.0
2020-02-18	2	0	0.0
2020-02-19	2	0	0.0
2020-02-20	2	0	0.0
2020-02-21	2	0	0.0
2020-02-22	2	0	0.0
2020-02-23	2	0	0.0
2020-02-24	2	0	0.0
2020-02-25	2	0	0.0
2020-02-26	2	0	0.0
2020-02-27	2	0	0.0
2020-02-28	2	0	0.0
2020-02-29	3	1	0.14285714285714285
2020-03-01	3	0	0.14285714285714285
2020-03-02	4	1	0.2857142857142857
2020-03-03	4	0	0.2857142857142857
2020-03-04	4	0	0.2857142857142857
2020-03-05	5	1	0.428571428571428

## Percentage Change in New Daily Cases By State

In [29]:
function percent_change_cases_by_state(df_region,day_index=0,delta=14)
    state, cases, pch_casesIncMA = [],[],[]
    for k in keys(groupby(df_region,[:state]))
        region = select_region(df_region,k.state)
        push!(state,k.state)
        push!(cases, region[end,:cases])
        push!(pch_casesIncMA, round(percent_change(region,:casesIncreaseMA,day_index,delta)))    
    end
    sort(DataFrame(state=state,cases=cases,pch_casesIncMA=pch_casesIncMA),[:state]) 
end

percent_change_cases_by_state (generic function with 3 methods)

In [30]:
# Using dfs, returning df_state_delta
dfsd = percent_change_cases_by_state(dfs,0,3)

Unnamed: 0_level_0,state,cases,pch_casesIncMA
Unnamed: 0_level_1,Any,Any,Any
1,Alabama,11674,-0.0
2,Alaska,392,15.0
3,Arizona,13631,8.0
4,Arkansas,4578,33.0
5,California,78933,-3.0
6,Colorado,21604,-14.0
7,Connecticut,36703,-4.0
8,Delaware,7547,8.0
9,District of Columbia,7042,-16.0
10,Florida,44803,9.0


In [31]:
# Using dfs
days_ma = 7
@manipulate for idx = 3:-1:0
    index = idx*7
    dfsd = sort(percent_change_cases_by_state(dfs,index,days_ma),[:pch_casesIncMA])
    regions,deltas = dfsd.state, dfsd.pch_casesIncMA
    bar(regions,deltas,ylimits=(-100,100),
        xrotation=45,size=(1000,800),color=:orange,
        xticks=(1:1:length(regions), regions),yticks=-100:10:100)
    xlabel!("States")
    ylabel!("Change (%) In Daily Cases")
    title!("Change in New Cases (% Change over Two Weeks)")
end

LoadError: UndefVarError: @manipulate not defined

In [16]:
] add Interact

[32m[1m   Updating[22m[39m registry at `~/.julia/registries/General`


[?25l[2K

[32m[1m   Updating[22m[39m git-repo `https://github.com/JuliaRegistries/General.git`


[?25h

[32m[1m  Resolving[22m[39m package versions...
[32m[1m  Installed[22m[39m AssetRegistry ───────── v0.1.0
[32m[1m  Installed[22m[39m FunctionalCollections ─ v0.5.0
[32m[1m  Installed[22m[39m Pidfile ─────────────── v1.1.0
[32m[1m  Installed[22m[39m Observables ─────────── v0.2.3
[32m[1m  Installed[22m[39m Interact ────────────── v0.10.3
[32m[1m  Installed[22m[39m WebSockets ──────────── v1.5.2
[32m[1m  Installed[22m[39m InteractBase ────────── v0.10.3
[32m[1m  Installed[22m[39m WebIO ───────────────── v0.8.14
[32m[1m  Installed[22m[39m IniFile ─────────────── v0.5.0
[32m[1m  Installed[22m[39m Widgets ─────────────── v0.6.2
[32m[1m  Installed[22m[39m Knockout ────────────── v0.2.4
[32m[1m  Installed[22m[39m JSExpr ──────────────── v0.5.2
[32m[1m  Installed[22m[39m CSSUtil ─────────────── v0.1.1
[32m[1m  Installed[22m[39m HTTP ────────────────── v0.8.14
[32m[1m   Updating[22m[39m `~/.julia/environments/v1.4/Project.toml`
 [9