Skip to content

Commit

Permalink
Update from JHU data regularly
Browse files Browse the repository at this point in the history
  • Loading branch information
kennygrant committed Apr 5, 2020
1 parent 9fa4fb6 commit 4deb210
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 148 deletions.
6 changes: 2 additions & 4 deletions main.go
Expand Up @@ -43,7 +43,7 @@ func main() {
}

// Schedule a regular data update/reload
//ScheduleUpdates()
ScheduleUpdates()

// Load our template files into memory
loadTemplates()
Expand Down Expand Up @@ -202,9 +202,7 @@ func handleReload(w http.ResponseWriter, r *http.Request) {
log.Printf("reload error:%s", err)
http.Error(w, err.Error(), 500)
} else {
w.Header().Set("Content-Type", "text/html; charset=utf-8")
w.WriteHeader(200)
w.Write([]byte("reloaded"))
http.Redirect(w, r, "/", 302)
}

}
Expand Down
42 changes: 42 additions & 0 deletions series/series.go
Expand Up @@ -124,6 +124,14 @@ func (d *Data) Title() string {
return fmt.Sprintf("%s (%s)", d.Province, d.Country)
}

// UpdatedAtDisplay retuns a string to display updated at date (if we have a date)
func (d *Data) UpdatedAtDisplay() string {
if d.UpdatedAt.IsZero() {
return ""
}
return fmt.Sprintf("Data last updated at %s", d.UpdatedAt.Format("2006-01-02 15:04 MST"))
}

// SetUpdated updates UpdatedAt if it is before this new time
func (d *Data) SetUpdated(updated time.Time) {
if d.UpdatedAt.Before(updated) {
Expand Down Expand Up @@ -575,6 +583,13 @@ func (d *Data) UpdateToday(updated time.Time, deaths, confirmed, recovered, test
}
}

// ResetDays clears all days stored for this time series
func (d *Data) ResetDays() {
count := len(d.Days)
d.Days = []*Day{}
d.AddDays(count)
}

// FIXME - I think this won't be required

// AddDay adds a day to this series
Expand Down Expand Up @@ -606,3 +621,30 @@ func (d *Data) AddDay(date time.Time, deaths, confirmed, recovered, tested int)
d.Days = append(d.Days, day)
return nil
}

// ShouldIncludeInGlobal returns true if this series should be added to global
func (d *Data) ShouldIncludeInGlobal() bool {
if d.IsGlobal() {
return false
}

// Exclude our extra series from global
if d.IsCountry() {
if d.Country == "China" || d.Country == "Australia" || d.Country == "Canada" {
return false
}
}

// Exclude US provinces from totals as we have a global entry
if d.IsProvince() && d.Country == "US" {
return false
}

// Exclude our extra UK provinces from gloval total as we have a UK entry from JHU
if d.Country == "United Kingdom" && (d.Province == "England" || d.Province == "Scotland" || d.Province == "Wales" || d.Province == "Northern Ireland") {
return false
}

// By default return true
return true
}
165 changes: 139 additions & 26 deletions series/update.go
Expand Up @@ -3,47 +3,87 @@ package series
import (
"fmt"
"log"
"sort"
"strconv"
"time"
)

// UpdateFromJHUCases updates from JHU case data
// Cols: FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key
func UpdateFromJHUCases(rows [][]string) error {
// UpdateFromJHUCountryCases updates from JHU country cases data files
// several files are required to get all data, all with different formats
// Cols: Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active
func UpdateFromJHUCountryCases(rows [][]string) error {

log.Printf("series: update from JHU %d rows", len(rows))
log.Printf("series: update from JHU country cases %d rows", len(rows))

// For each row in the input data, reject if admin2 completed
for i, row := range rows {
// Check format on row 0
if i == 0 {
if row[0] != "FIPS" || row[2] != "Province_State" || row[11] != "Combined_Key" {
return fmt.Errorf("error reading JHU cases - format invalid for row:%s", row)
if row[0] != "Country_Region" || row[1] != "Last_Update" || row[7] != "Active" {
return fmt.Errorf("error reading JHU country cases - format invalid for row:%s", row)
}
continue
}

// Reject rows with Admin2 completed
if row[1] != "" {
country := row[0]
province := ""

// Find the series for this row
series, err := dataset.FetchSeries(country, province)
if err != nil || series == nil {
continue
}

// If we reach here we have a valid row and series - NB shuffled cols to match our default
updated, deaths, confirmed, recovered, err := readJHURowData(row[1], row[5], row[4], row[6])
if err != nil {
continue
}

// We don't hav etested data from JHU so leave it unchanged
series.UpdateToday(updated, deaths, confirmed, recovered, 0)

log.Printf("update: %s u:%v d:%d c:%d r:%d", series, updated, deaths, confirmed, recovered)

}

return nil
}

// UpdateFromJHUStatesCases updates from JHU states cases data files
// several files are required to get all data, all with different formats
// 0 1 2 3 4 5 6 7 8 9
// FIPS,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active
func UpdateFromJHUStatesCases(rows [][]string) error {

log.Printf("series: update from JHU states cases %d rows", len(rows))

// For each row in the input data, reject if admin2 completed
for i, row := range rows {
// Check format on row 0
if i == 0 {
if row[0] != "FIPS" || row[3] != "Last_Update" || row[9] != "Active" {
return fmt.Errorf("error reading JHU states cases - format invalid for row:%s", row)
}
continue
}

province := row[2]
country := row[3]
country := row[2]
province := row[1]

// Read other rows which are are interested in, and ask series to update last day if changed
// Find the series concerned
series, err := dataset.FetchSeries(country, province)
if err != nil || series == nil {
continue
}

// If we reach here we have a valid row and series
updated, deaths, confirmed, recovered, err := readJHURow(row)
// If we reach here we have a valid row and series - NB shuffled cols to match our default
updated, deaths, confirmed, recovered, err := readJHURowData(row[3], row[7], row[6], row[8])
if err != nil {
continue
}

// We don't hav etested data from JHU so leave it unchanged
// We don't have tested data from JHU so leave it unchanged
series.UpdateToday(updated, deaths, confirmed, recovered, 0)

log.Printf("update: %s u:%v d:%d c:%d r:%d", series, updated, deaths, confirmed, recovered)
Expand All @@ -53,32 +93,105 @@ func UpdateFromJHUCases(rows [][]string) error {
return nil
}

// Note row order is different from our standard order
// 0 1 2 3 4 5 6 7 8 9 10 11
// Cols: FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key
func readJHURow(row []string) (time.Time, int, int, int, error) {
// Note csv col order is different from our standard order
func readJHURowData(updatedstr, deathsstr, confirmedstr, recoveredstr string) (time.Time, int, int, int, error) {

// Dates are, remarkably, in two different formats in one file
// Try first in the one true format
updated, err := time.Parse("2006-01-02 15:04:05", row[4])
updated, err := time.Parse("2006-01-02 15:04:05", updatedstr)
if err != nil {
return updated, 0, 0, 0, fmt.Errorf("load: error reading updated at series:%s error:%s", row[0], err)
return updated, 0, 0, 0, fmt.Errorf("load: error reading updated at series:%s error:%s", updatedstr, err)
}

confirmed, err := strconv.Atoi(row[4])
deaths, err := strconv.Atoi(deathsstr)
if err != nil {
return updated, 0, 0, 0, fmt.Errorf("load: error reading confirmed series:%s error:%s", row[0], err)
return updated, 0, 0, 0, fmt.Errorf("load: error reading deaths series:%s error:%s", deathsstr, err)
}

deaths, err := strconv.Atoi(row[5])
confirmed, err := strconv.Atoi(confirmedstr)
if err != nil {
return updated, 0, 0, 0, fmt.Errorf("load: error reading deaths series:%s error:%s", row[0], err)
return updated, 0, 0, 0, fmt.Errorf("load: error reading confirmed series:%s error:%s", confirmedstr, err)
}

recovered, err := strconv.Atoi(row[6])
recovered, err := strconv.Atoi(recoveredstr)
if err != nil {
return updated, 0, 0, 0, fmt.Errorf("load: error reading recovered series:%s error:%s", row[0], err)
return updated, 0, 0, 0, fmt.Errorf("load: error reading recovered series:%s error:%s", recoveredstr, err)
}

return updated, deaths, confirmed, recovered, nil
}

// CalculateGlobalSeriesData adds some top level countries which are inexplicably missing from the original dataset
// presumably they calculate these on the fly
func CalculateGlobalSeriesData() error {

// Fetch series
China, err := dataset.FetchSeries("China", "")
if err != nil {
return err
}
Australia, err := dataset.FetchSeries("Australia", "")
if err != nil {
return err
}
Canada, err := dataset.FetchSeries("Canada", "")
if err != nil {
return err
}
Global, err := dataset.FetchSeries("", "")
if err != nil {
return err
}

// Reset all these series as we're recalculating from scratch
China.ResetDays()
Australia.ResetDays()
Canada.ResetDays()
Global.ResetDays()

// Add global country entries for countries with data broken down at province level
// these are missing in the datasets from JHU for some reason, though US is now included
for _, s := range dataset {

// Build an overall China series
if s.Country == "China" {
err = China.MergeSeries(s)
if err != nil {
return err
}
}

// Build an overall Australia series
if s.Country == "Australia" {
err = Australia.MergeSeries(s)
if err != nil {
return err
}
}

// Build an overall Canada series
if s.Country == "Canada" {
err = Canada.MergeSeries(s)
if err != nil {
return err
}
}

if s.ShouldIncludeInGlobal() {
// log.Printf("global:%s-%d", s.Country, s.TotalDeaths())
err = Global.MergeSeries(s)
if err != nil {
return err
}
} else {
if s.TotalDeaths() > 0 {
// log.Printf("ignore for global:%s deaths:%d", s, s.TotalDeaths())
}
}
}

// Sort entire dataset by deaths desc to get the right order
sort.Stable(dataset)

return nil
}

0 comments on commit 4deb210

Please sign in to comment.