In [1]:
from cs103 import *
from typing import NamedTuple, List
import csv
import matplotlib.pyplot as plt


Case = NamedTuple("Case", [('number', int), ('prov', str), ('country', str), 
                           ('conf', float), ('death', float), ('recov', float)])
#interp. a Case has a
# serial number, Province/State, Country, Confirmed cases, Deaths, Recovered

@typecheck
def fn_for_case(c: Case)->...:  
    return...(c.number,
              c.prov,
              c.country,
              c.conf, 
              c.death,
              c.recov)

BASECASE = Case(0, "None", "None", 0, 0, 0)
C1 = Case(1, "Anhui", "China", 1.0, 0.0, 0.0)
C2 = Case(2, "Beijing", "China", 14.0, 0.0, 0.0)
C3 = Case(3, "Chongqing", "China", 6.0, 0.0, 0.0)
C4 = Case(4, "Fujian", "China", 1.0, 0.0, 0.0)

@typecheck
def fn_for_loc(loc: List[Case]) -> ...:   
    # description of the accumulator           
    acc = ...      # type: ...
    for c in loc:
        acc = ...(fn_for_case(c), acc)

    return ...(acc)

LOC0 = []
LOC1 = [C1]
LOC2 = [C1, C2, C3, C4]

In [2]:
### functions

@typecheck
def read(filename: str) -> List[Case]:
    """    
    reads information from the specified file and returns a list of cases. Each Case has a
    serial number, Province/State, Country, Confirmed cases, Deaths, Recovered
    """
    #return [] #stub
    # Template from HtDAP
    # loc contains the result so far
    loc = [] # type: List[Case]

    with open(filename) as csvfile:
        
        reader = csv.reader(csvfile)
        next(reader) # skip header line

        for row in reader:
            # you may not need to store all the rows, and you may need
            # to convert some of the strings to other types
            c = Case(parse_int(row[0]), row[2], row[3], parse_float(row[5]), 
                     parse_float(row[6]), parse_float(row[7]))
            loc.append(c)
    
    return loc

#begin testing
start_testing()

expect(read('testfile_empty.csv'), [])
expect(read('testfile_1case.csv'), [C1])
expect(read("testfile_4case.csv"), [C1, C2, C3, C4])

summary()


[92m3 of 3 tests passed[0m


In [19]:
read('2019_nCoV_data.csv')

AttributeError: 'DataFrameGroupBy' object has no attribute 'reader'

In [4]:
@typecheck
def is_bigger(n1: float, n2: float)-> bool:
    """
    takes two numbers and returns True if the first is bigger than the second
    """
    
    if n1 == None:
        return False
    elif n2 == None:
        return True
    else:
        return n1 > n2
    
@typecheck
def more_confirmed(c1: Case, c2: Case)-> bool:
    """
    takes two cases and returns true if the first case has more confirmed cases
    """
    return c1.conf > c2.conf
    
@typecheck
def most_confirmed_prov(loc: List[Case])-> str:
    """
    takes a list of cases and returns the state/province with the most cases
    """
    
    biggest = BASECASE
    
    for c in loc:
        if more_confirmed(c, biggest):
            biggest = c
        
        
    return biggest.prov

@typecheck
def most_confirmed_case(loc: List[Case])-> Case:
    """
    takes a list of cases and returns the case with the most confirmed cases
    """
    
    biggest = BASECASE
    
    for c in loc:
        if more_confirmed(c, biggest):
            biggest = c       
        
    return biggest

@typecheck
def is_most_confirmed(c1: Case, loc: List[Case])-> bool:
    """
    takes a list of cases and a case and returns true if that case is the most in the list
    """
    return most_confirmed_case(loc) == c1

@typecheck
def partition(array, start, end):
    pivot = array[start]
    low = start + 1
    high = end
    
    while True:
        while low <= high and array[high] >= pivot:
            high = high - 1
        while low <= high and array[low] <= pivot:
            low = low + 1
        if low <= high:
            array[low], array[high] = array[high], array[low]
        else:
            break
    array[start], array[high] = array[high], array[start]
    
    return high

@typecheck
def quick_sort(array, start, end):
    if start >= end:
        return
    p = partition(array, start, end)
    quick_sort(array, start, p-1)
    quick_sort(array, p+1, end)
        

@typecheck
def sort_conf_cases(loc: List[Case])-> List[Case]:
    """
    sorts the cases in order from case with most confirmed to least
    """
    acc = []
    biggest = BASECASE
    for c in loc:
        if more_confirmed(c, BASECASE):
            biggest = c 
            acc = acc + [c]
                  
    return acc
        
    
start_testing()

expect (is_bigger(1, 2), False)
expect (is_bigger(2, 1), True)
expect (is_bigger(1, 1), False)

expect (more_confirmed(BASECASE, C1), False)
expect (more_confirmed(C1, BASECASE), True)
expect (more_confirmed(C2, C1), True)
expect (more_confirmed(C2, C4), True)
expect (more_confirmed(C1, C3), False)

expect (most_confirmed_prov(LOC0), "None")
expect (most_confirmed_prov(LOC1), "Anhui")
expect (most_confirmed_prov(LOC2), "Beijing")

expect (most_confirmed_case(LOC0), BASECASE)
expect (most_confirmed_case(LOC1), C1)
expect (most_confirmed_case(LOC2), C2)

expect (is_most_confirmed(BASECASE, LOC0), True)
expect (is_most_confirmed(C1, LOC0), False)
expect (is_most_confirmed(BASECASE, LOC1), False)
expect (is_most_confirmed(C1, LOC1), True)
expect (is_most_confirmed(C1, LOC2), False)
expect (is_most_confirmed(C2, LOC2), True)

expect (sort_conf_cases (LOC0), [])
expect (sort_conf_cases (LOC1), [C1])
expect (sort_conf_cases (LOC2), [C2, C3, C1, C4])

summary()
    

[91mTest failed:[0m expected [Case(number=2, prov='Beijing', country='China', conf=14.0, death=0.0, recov=0.0), Case(number=3, prov='Chongqing', country='China', conf=6.0, death=0.0, recov=0.0), Case(number=1, prov='Anhui', country='China', conf=1.0, death=0.0, recov=0.0), Case(number=4, prov='Fujian', country='China', conf=1.0, death=0.0, recov=0.0)] but got [Case(number=1, prov='Anhui', country='China', conf=1.0, death=0.0, recov=0.0), Case(number=2, prov='Beijing', country='China', conf=14.0, death=0.0, recov=0.0), Case(number=3, prov='Chongqing', country='China', conf=6.0, death=0.0, recov=0.0), Case(number=4, prov='Fujian', country='China', conf=1.0, death=0.0, recov=0.0)]
   [1mLine 129: [0mexpect (sort_conf_cases (LOC2), [C2, C3, C1, C4])
[91m22 of 23 tests passed[0m


In [5]:
most_confirmed_prov(read('2019_nCoV_data.csv'))

'Hubei'

In [6]:
BASECASE = Case(0, "None", "None", 0, 0, 0)
C1 = Case(1, "Anhui", "China", 1.0, 0.0, 0.0)
C2 = Case(2, "Beijing", "China", 14.0, 0.0, 0.0)
C3 = Case(3, "Chongqing", "China", 6.0, 0.0, 0.0)
C4 = Case(4, "Fujian", "China", 1.0, 0.0, 0.0)

LOC0 = []
LOC1 = [C1]
LOC2 = [C1, C2, C3, C4]

In [7]:
print (C4)


Case(number=4, prov='Fujian', country='China', conf=1.0, death=0.0, recov=0.0)


In [97]:
import pandas as pd

In [98]:
df = pd.read_csv("./2019_nCoV_data.csv")

In [100]:
df = df.groupby(["Province/State"]).apply(lambda x:x.sort_values(["Confirmed"], ascending = True)).drop_duplicates('Province/State', keep='last')

In [101]:
print(df)

                      Sno                 Date Province/State         Country  \
Province/State                                                                  
Anhui          1649  1650  02/17/2020 22:00:00          Anhui  Mainland China   
Arizona        471    472  01/31/2020 19:00:00        Arizona              US   
Bavaria        487    488  01/31/2020 19:00:00        Bavaria         Germany   
Beijing        1657  1658  02/17/2020 22:00:00        Beijing  Mainland China   
Boston, MA     1712  1713  02/17/2020 22:00:00     Boston, MA              US   
...                   ...                  ...            ...             ...   
Victoria       1691  1692  02/17/2020 22:00:00       Victoria       Australia   
Washington     468    469  01/31/2020 19:00:00     Washington              US   
Xinjiang       1672  1673  02/17/2020 22:00:00       Xinjiang  Mainland China   
Yunnan         1663  1664  02/17/2020 22:00:00         Yunnan  Mainland China   
Zhejiang       1647  1648  0

In [102]:
df.to_csv('test.csv')