In [5]:
import pandas as pd


def clean_data(df_raw):
    """Return melted dateframe and set index"""
    df = df_raw.melt(id_vars=['Province/State','Country/Region','Lat','Long'],value_name='Cases',var_name='Date')
    df = df.set_index(['Country/Region','Province/State','Date'])
    return df


def consolidate_country_data(df_cleaned):
    """Return country-wide data rather than by region"""
    df = df_cleaned.groupby(['Country/Region','Date'])['Cases'].sum().reset_index()
    df = df.set_index(['Country/Region','Date'])
    df.index = df.index.set_levels([df.index.levels[0], pd.to_datetime(df.index.levels[1])])
    df = df.sort_values(['Country/Region','Date'],ascending=True)
    return df
  

def select_top_10_country_data(df_country_data):
    """Returns dataframe for top 10 countries sorted by maximum cases"""
    df = df_country_data.max(level=0)['Cases'].reset_index().set_index('Country/Region')
    df.sort_values(by='Cases',ascending=False, inplace=True)
    df_top_10 = df.head(10)
    return df_top_10


# Why is this dict rather than dataframe?
def order_by_day_from_num_cases(top_10_country_list, df_top_10_country, num_cases):
    """Returns dict with country names and the data. 
    Start defined as the first day more than num_cases were reported"""

    top_10_growth = {}

    for country in top_10_country_list:
        country_first_case = df_top_10_country.loc[country]['Cases'].reset_index().set_index('Date')
        country_growth = country_first_case[country_first_case.gt(num_cases)].dropna().reset_index()
        top_10_growth[country] = country_growth

    return top_10_growth
    

df_raw = pd.read_csv("https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
df_cleaned = clean_data(df_raw)
df_country = consolidate_country_data(df_cleaned)
df_top_10_country = select_top_10_country_data(df_country)
top_10_country_list = df_top_10_country.index.tolist()
top_10_growth = order_by_day_from_num_cases(top_10_country_list, df_top_10_country, 99)
top_10_growth




AttributeError: 'numpy.int64' object has no attribute 'reset_index'