In [43]:
f = open('seed.csv', 'r')
raw_lines = f.readlines()
f.close()
lines = [line[:-1] for line in raw_lines[:-1]] + [raw_lines[-1]]
records = [line.split(',') for line in lines]
records[:10]

[['Category',
  'Field',
  'Adjustable',
  'Initial Seed',
  'Variance',
  'Max Daily Growth',
  'Min Daily Growth'],
 ['Health', 'Chronic disease claim data', 'FALSE', '40', '4', '0.03', '-0.01'],
 ['Health', 'Socioeconomic census data', 'FALSE', '20', '5', '0', '0'],
 ['Health', 'Other demographic data', 'FALSE', '40', '3', '0', '0'],
 ['Health', 'Density', 'FALSE', '30', '7', '0.01', '-0.03'],
 ['Health', 'Comorbidity', 'FALSE', '40', '3', '0.03', '-0.05'],
 ['Health', 'Chronic Disease', 'FALSE', '40', '4', '0.03', '-0.01'],
 ['Capacity of Care', 'Available Beds', 'TRUE', '25', '8', '0.02', '0'],
 ['Capacity of Care',
  'Convertible Bed Capacity',
  'TRUE',
  '28',
  '10',
  '0.1',
  '0'],
 ['Capacity of Care',
  'Trained Health workers',
  'TRUE',
  '22',
  '4',
  '0.02',
  '0']]

In [44]:
categories = set([r[0] for r in records[:1]])

In [45]:
import random
class Factor:
    def __init__(self, record):
        self.category = record[0]
        self.name = record[1]
        self.editable = (record[2] == 'TRUE')
        self.initial_seed = float(record[3])
        self.variance = float(record[4])
        self.max_growth = float(record[5])
        self.min_growth = float(record[6])
    
    def generate_series(self, nDays):
        current = random.gauss(self.initial_seed, self.variance)
        result = [current]
        for i in range(nDays - 1):
            current = current * (1 + random.uniform(self.min_growth, self.max_growth))
            current = max(5, min(95, current))
            result.append(current)
        result = [round(x, 3) for x in result]
        self.series = result
        return result
    
    def record(self):
        return {"name": self.name, "editable": self.editable, "weight": 1}

In [46]:
factors = [Factor(record) for record in records[1:]]

In [47]:
categories = set([factor.category for factor in factors])
category_list = list(categories)
category_dict = {}
column_dict = {}
headers = ['Date'] + [factor.name for factor in factors] + category_list + ['Overall']
for category in categories:
    category_dict[category] = []
    column_dict[category] = []

headers


['Date',
 'Chronic disease claim data',
 'Socioeconomic census data',
 'Other demographic data',
 'Density',
 'Comorbidity',
 'Chronic Disease',
 'Available Beds',
 'Convertible Bed Capacity',
 'Trained Health workers',
 'PPE Supply',
 'Testing capacity',
 'Ventilators',
 'Temp/humidity',
 'Mobility/Activity',
 'Green Space per Person',
 'PM 2.5 concentration',
 'PPE Usage',
 'Social Listening',
 'Cable News Viewership',
 'Talk Radio Penetration',
 'Tabloid Sales',
 'Political Affiliation',
 'Occupational risk score',
 'Commercial Density',
 'Tourism Propensity',
 'Entertainment ',
 'Church attendance',
 'Unemployment rate',
 'Environmental',
 'Capacity of Care',
 'Health',
 'Behavioral',
 'Industrial',
 'Overall']

In [48]:
class Category:
    def __init__(self, name, columns):
        self.name = name
        self.columns = columns
        
    def get_day(self, factors, day_num):
        values = [factor[day_num] for factor in factors]
        return round(sum(values)/len(values), 3)
    
    def generate_series(self, factors):
        column_set = self.columns
        num_days = len(factors[0].series)
        contributing_factors = [factors[i].series for i in range(len(factors)) if i in column_set]
        self.series = [self.get_day(contributing_factors, day) for day in range(num_days)]


In [49]:
bad = []
for factor in factors:
    try:
        category_dict[factor.category].append(factor.name)
        column_dict[factor.category].append(headers.index(factor.name))
    except Error:
        bad.append(factor)

In [50]:
category_class_list = [Category(category, column_dict[category]) for category in category_list]

In [51]:
def generate_record(date, n):
    category_values = [categoryClass.series[n] for categoryClass in category_class_list]
    return [date] + [factor.series[n] for factor in factors] + category_values + [round(sum(category_values)/len(category_values), 2)]

In [52]:
def generate_date(first_month, first_day, day_num):
    day_of_year = [1, 32, 61, 92, 122, 153, 183, 214, 245, 275, 306, 336]
    day_of_sequence_start = day_of_year[first_month - 1] + first_day - 1
    day_to_gen = day_of_sequence_start + day_num # sequence is 0-based, so first day is day 0
    months_in_past = [day for day in day_of_year if day <= day_to_gen]
    month_num = len(months_in_past)
    day_in_month = day_to_gen + 1 - months_in_past[-1]
    return '%02d-%02d' % (month_num, day_in_month)

In [53]:
county_names = ['Alamance', 'Alexander', 'Alleghany', 'Anson', 'Ashe', 'Avery', 'Beaufort', 'Bertie', 'Bladen', 'Brunswick', 'Buncombe', 'Burke', 'Cabarrus', 'Caldwell', 'Camden', 'Carteret', 'Caswell', 'Catawba', 'Chatham', 'Cherokee', 'Chowan', 'Clay', 'Cleveland', 'Columbus', 'Craven', 'Cumberland', 'Currituck', 'Dare', 'Davidson', 'Davie', 'Duplin', 'Durham', 'Edgecombe', 'Forsyth', 'Franklin', 'Gaston', 'Gates', 'Graham', 'Granville', 'Greene', 'Guilford', 'Halifax', 'Harnett', 'Haywood', 'Henderson', 'Hertford', 'Hoke', 'Hyde', 'Iredell', 'Jackson', 'Johnston', 'Jones', 'Lee', 'Lenoir', 'Lincoln', 'Macon', 'Madison', 'Martin', 'McDowell', 'Mecklenburg', 'Mitchell', 'Montgomery', 'Moore', 'Nash', 'New Hanover', 'Northampton', 'Onslow', 'Orange', 'Pamlico', 'Pasquotank', 'Pender', 'Perquimans', 'Person', 'Pitt', 'Polk', 'Randolph', 'Richmond', 'Robeson', 'Rockingham', 'Rowan', 'Rutherford', 'Sampson', 'Scotland', 'Stanly', 'Stokes', 'Surry', 'Swain', 'Transylvania', 'Tyrrell', 'Union', 'Vance', 'Wake', 'Warren', 'Washington', 'Watauga', 'Wayne', 'Wilkes', 'Wilson', 'Yadkin', 'Yancey']

In [54]:
class County:
    def __init__(self, name):
        self.name = name
    
    def gen_records(self, first_month, first_day, num_days):
        result = [headers[:]]
        for factor in factors: factor.generate_series(num_days)
        for category_class in category_class_list: category_class.generate_series(factors)
        for day in range(num_days):
            date = generate_date(first_month, first_day, day)
            result.append(generate_record(date, day))
        self.records = result
    
    def check_records(self):
        length = len(self.records[0])
        bad = [record for record in self.records if len(record) != length]
        return bad
    
    def county_record(self):
        return {"name": self.name, "records": self.records}

In [55]:
counties = [County(name) for name in county_names]

In [56]:
for county in counties: county.gen_records(7, 1, 62)

In [57]:
bad = [county.check_records() for county in counties]

In [58]:
len([b for b in bad if len(b) > 0])

0

In [59]:
def category_record():
    result = {}
    for category in category_dict:
        result[category] = {"factors": category_dict[category], "weight": 1}
    return result
        

In [60]:
import json
data_record = {"categories": category_record(), "factors": [factor.record() for factor in factors], "records":[county.county_record() for county in counties]}
result = json.dumps(data_record)
f = open('county_data.json', 'w')
f.write(result)
f.write('\n')
f.close()