In [4]:
import numpy as np
import pandas as pd
from uk_covid19 import Cov19API

min_confirmed = 5
days_ran = 250
days_predicted = 20


class LoadData:
    @staticmethod
    def getUkdf() -> pd.DataFrame:
        """
        get the data from Nhs api
        These should return a dataframe
        """

        all_nations = ["areaType=utla"]

        cases_and_deaths = {
            "date": "date",
            "areaName": "areaName",
            "dailyCases": "newCasesByPublishDate",
            "dailyDeaths": "newDeaths28DaysByPublishDate",
            "cumulativeCases": "cumCasesByPublishDate",
        }

        api = Cov19API(filters=all_nations, structure=cases_and_deaths)

        df = api.get_dataframe()

        # df["date"] = pd.to_datetime(df["date"]).dt.date
        # df_uk = df[df.daily > min_confirmed]
        # df_uk["day"] = df_uk.date.apply(
        #     lambda x: (x-df_uk.date.min()).days
        # )
        # df_uk = df_uk.reset_index()

        # if retain == True:
        #     lastdate = str(df_uk.date.iloc[-1])
        #     df_uk.to_csv(lastdate + "_uk_by_day.csv", index=False)

        df["date"] = pd.to_datetime(df["date"]).dt.date
        df.fillna(
            value={"dailyDeaths": 0, "cumulativeCases": 0},
            inplace=True,
            downcast="int64",
        )
        df = df[df.dailyCases > min_confirmed]
        df["day"] = df.date.apply(lambda x: (x - df.date.min()).days)
        df = df.reset_index(drop=True)

        return df

    @staticmethod
    def getcitiesDF(df: pd.DataFrame, city_name: str) -> pd.DataFrame:
        """[Get updated data on the epidemic of a specific city by day in the uk]
        Args:
            df ([type: datframe]): [The resulting Data returned from the covid19 uk data api]
            city_name (str): [the name of the city that should be passed]
        Returns:
            pd.DataFrame: [covid-19 cumulative data of daily confirmed and death in the uk for a particular city]
        """

        grouped_area = df[df["areaName"] == city_name]
        grouped_area_cases = grouped_area.drop(columns=["areaName"])
        grouped_area_cases = grouped_area_cases.reset_index(drop=True)
        new_df = grouped_area_cases.head(days_ran)

        time_data = np.array(new_df.day.values.astype(np.float64))
        time_data = time_data - time_data[0]

        dailycases = new_df.dailyCases.values.astype(np.float64)
        deathcases = np.diff(new_df.dailyDeaths.values.astype(np.float64))
        deathcases = np.insert(deathcases, 0, deathcases[0])

        original_data = np.array([dailycases, deathcases])

        data_dates = new_df.date.values
        data_dates = [date.strftime("%d/%m/%y") for date in data_dates]

        return original_data, data_dates, time_data

    @staticmethod
    def getDataJH() -> pd.DataFrame:
        # https://gradcoach.com/literature-review-structure/

        confirmed_df = pd.read_csv(
            "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
        )
        deaths_df = pd.read_csv(
            "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
        )
        recoveries_df = pd.read_csv(
            "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"
        )
        # latest_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/08-22-2020.csv')
        # us_medical_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/08-22-2020.csv')

        # data_url = requests.get(url).content
        # df = pd.read_csv(io.StringIO(data_url.decode("utf-8")))

        # confirmed_df = confirmed_df.drop(
        #     ["UID", "iso2", "iso3", "code3", "FIPS", "Admin2", "Combined_Key"], axis=1
        # )
        # deaths_df = deaths_df.drop(
        #     ["UID", "iso2", "iso3", "code3", "FIPS", "Admin2", "Combined_Key"], axis=1
        # )
        # recoveries_df = recoveries_df.drop(
        #     ["UID", "iso2", "iso3", "code3", "FIPS", "Admin2", "Combined_Key"], axis=1
        # )

        columns_rename = {"Province_State": "State", "Country_Region": "Country"}
        confirmed_df.rename(columns=columns_rename, inplace=True)
        deaths_df.rename(columns=columns_rename, inplace=True)
        recoveries_df.rename(columns=columns_rename, inplace=True)

        confirmed_df = confirmed_df.groupby(by="Country", as_index=False).sum()
        deaths_df = deaths_df.groupby(by="Country", as_index=False).sum()
        recoveries_df = recoveries_df.groupby(by="Country", as_index=False).sum()

        grouped_df = pd.DataFrame(
            columns=[
                "date",
                "State",
                "Country",
                "Lat",
                "Long",
                "Confirmed",
                "Deaths",
                "Recorvered",
            ]
        )
        grouped_df["date"] = confirmed_df.columns[4:]
        grouped_df["Confirmed"] = grouped_df["Dates"].apply(
            lambda x: confirmed_df[x].sum()
        )
        grouped_df["Deaths"] = grouped_df["Dates"].apply(lambda x: deaths_df[x].sum())
        grouped_df["Recovered"] = grouped_df["Dates"].apply(
            lambda x: recoveries_df[x].sum()
        )
        grouped_df.reset_index(drop=False, inplace=True)

        return grouped_df