In [27]:
#Learn more or give us feedback
from typing import List
import pandas as pd
import datetime
CONFIRMED_CASES_URL = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv "    
df = pd.read_csv(CONFIRMED_CASES_URL, error_bad_lines=False)

"""
When downloading data it's better to do it in a global scope instead of a function.
This speeds up the tests significantly
"""


def poland_cases_by_date(day: int, month: int, year: int = 2020) -> int:
    """
    Returns confirmed infection cases for country 'Poland' given a date.
    Ex.
    >>> poland_cases_by_date(7, 3, 2020)
    5
    >>> poland_cases_by_date(11, 3)
    31
    :param year: 4 digit integer representation of the year to get the cases for, defaults to 2020
    :param day: Day of month to get the cases for as an integer indexed from 1
    :param month: Month to get the cases for as an integer indexed from 1
    :return: Number of cases on a given date as an integer
    """
    
    # Your code goes here (remove pass)
    data14=datetime.date(year, month, day).strftime('%m/%d/%y').lstrip("0").replace(" 0", " ").replace("/0", "/")
    return df.loc[df["Country/Region"]=="Poland",data14].values[0]


def top5_countries_by_date(day: int, month: int, year: int = 2020) -> List[str]:
    """
    Returns the top 5 infected countries given a date (confirmed cases).
    Ex.
    >>> top5_countries_by_date(27, 2, 2020)
    ['China', 'Korea, South', 'Cruise Ship', 'Italy', 'Iran']
    >>> top5_countries_by_date(12, 3)
    ['China', 'Italy', 'Iran', 'Korea, South', 'France']
    :param day: 4 digit integer representation of the year to get the countries for, defaults to 2020
    :param month: Day of month to get the countries for as an integer indexed from 1
    :param year: Month to get the countries for as an integer indexed from 1
    :return: A list of strings with the names of the coutires
    """

    # Your code goes here (remove pass)
    data14=datetime.date(year, month, day).strftime('%m/%d/%y').lstrip("0").replace(" 0", " ").replace("/0", "/")
    return list(df[["Country/Region",data14]].groupby(by=["Country/Region"]).sum().sort_values(by=data14, ascending=False).head(5).index)


def no_new_cases_count(day: int, month: int, year: int = 2020) -> int:
    """
    Returns the number of countries/regions where the infection count in a given day was the same as the previous day.
    Ex.
    >>> no_new_cases_count(11, 2, 2020)
    35
    >>> no_new_cases_count(3, 3)
    57
    :param day: 4 digit integer representation of the year to get the cases for, defaults to 2020
    :param month: Day of month to get the countries for as an integer indexed from 1
    :param year: Month to get the countries for as an integer indexed from 1
    :return: Number of countries/regions where the count has not changed in a day
    """
    
    # Your code goes here (remove pass)
    data14=datetime.date(year, month, day)
    data13=data14-datetime.timedelta(days=1)
    data14=data14.strftime('%m/%d/%y').lstrip("0").replace(" 0", " ").replace("/0", "/")
    data13=data13.strftime('%m/%d/%y').lstrip("0").replace(" 0", " ").replace("/0", "/")
    return df.loc[df[data14]!=df[data13],data14].count()

In [28]:
url = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"
df = pd.read_csv(url, error_bad_lines=False)
print(poland_cases_by_date(7,3))
print(poland_cases_by_date(11,3))
print(top5_countries_by_date(27,2))
print(top5_countries_by_date(12,3))
print(no_new_cases_count(11,2))
print(no_new_cases_count(3,3))

5
31
['China', 'Korea, South', 'Cruise Ship', 'Italy', 'Iran']
['China', 'Italy', 'Iran', 'Korea, South', 'France']
35
57
