In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame({
    "LTLA Name": ["Amber Valley", "Ashfield", "Bassetlaw"],
    "Population": [72179, 77988, 70832], 
    "PercentageVaccinated": [0.228571, 0.223342, 0.184493]
})
df

Unnamed: 0,LTLA Name,Population,PercentageVaccinated
0,Amber Valley,72179,0.228571
1,Ashfield,77988,0.223342
2,Bassetlaw,70832,0.184493


In [19]:
df.style.format("{:.2f}")

ValueError: Unknown format code 'f' for object of type 'str'

<pandas.io.formats.style.Styler at 0x7ff9671386a0>

In [24]:
df.drop(["LTLA Name"], axis=1).style.format("{:.2f}")

Unnamed: 0,Population,PercentageVaccinated
0,72179.0,0.23
1,77988.0,0.22
2,70832.0,0.18


In [19]:
df.style.format({
    "Population": "{:,d}",
    "PercentageVaccinated": "{:.2f}",
    "LTLA Name": "{:>30}"
}).hide_index()

LTLA Name,Population,PercentageVaccinated
Amber Valley,72179,0.23
Ashfield,77988,0.22
Bassetlaw,70832,0.18


In [25]:
from dateutil import parser

def create_vaccines_dataframe(latest_date):
    dose1 = pd.read_csv(f"data/data_{latest_date.strftime('%Y-%b-%d')}-dose1.csv")
    dose2 = pd.read_csv(f"data/data_{latest_date.strftime('%Y-%b-%d')}-dose2.csv")
    df = pd.merge(dose1, dose2, on=["date", "areaName", "areaType", "areaCode"])

    df.loc[:, "totalByDay"] = df.newPeopleVaccinatedSecondDoseByPublishDate + df.newPeopleVaccinatedFirstDoseByPublishDate
    df.loc[:, "percentageFirstDose"] = 100.0* df.newPeopleVaccinatedFirstDoseByPublishDate / df.totalByDay

    cols = ["date", "newPeopleVaccinatedSecondDoseByPublishDate", "newPeopleVaccinatedFirstDoseByPublishDate", "totalByDay", "percentageFirstDose"]
    all_df = df[df.areaName == "United Kingdom"]
    all_df = all_df.loc[~pd.isna(all_df.totalByDay)]

    all_df = all_df.rename(columns={
        "newPeopleVaccinatedFirstDoseByPublishDate": "firstDose", 
        "newPeopleVaccinatedSecondDoseByPublishDate": "secondDose",
        "cumPeopleVaccinatedFirstDoseByPublishDate": "firstDoseCumulative",
        "cumPeopleVaccinatedSecondDoseByPublishDate": "secondDoseCumulative"
    })
    all_df.loc[:, "totalDoses"] = all_df.firstDose + all_df.secondDose
    return all_df

In [116]:
latest_daily_date = parser.parse("2021-04-14")
all_df = create_vaccines_dataframe(latest_daily_date).copy()
all_df

Unnamed: 0,areaType,areaName,areaCode,date,firstDose,firstDoseCumulative,secondDose,secondDoseCumulative,totalByDay,percentageFirstDose,totalDoses
0,overview,United Kingdom,K02000001,2021-04-13,76123.0,32326604,312685.0,8170081,388808.0,19.578558,388808.0
1,overview,United Kingdom,K02000001,2021-04-12,59905.0,32250481,201191.0,7857396,261096.0,22.943668,261096.0
2,overview,United Kingdom,K02000001,2021-04-11,69223.0,32190576,189665.0,7656205,258888.0,26.738590,258888.0
3,overview,United Kingdom,K02000001,2021-04-10,111109.0,32121353,475230.0,7466540,586339.0,18.949618,586339.0
4,overview,United Kingdom,K02000001,2021-04-09,106878.0,32010244,450136.0,6991310,557014.0,19.187669,557014.0
...,...,...,...,...,...,...,...,...,...,...,...
88,overview,United Kingdom,K02000001,2021-01-15,341317.0,3576263,4208.0,447442,345525.0,98.782143,345525.0
89,overview,United Kingdom,K02000001,2021-01-14,316694.0,3234946,5257.0,443234,321951.0,98.367143,321951.0
90,overview,United Kingdom,K02000001,2021-01-13,278943.0,2918252,9745.0,437977,288688.0,96.624383,288688.0
91,overview,United Kingdom,K02000001,2021-01-12,207661.0,2639309,16065.0,428232,223726.0,92.819342,223726.0


In [117]:
all_df.loc[:, "dayOfWeek"] = all_df["date"].apply(lambda date: parser.parse(date).strftime("%A"))
by_day_of_week = all_df[["date", "totalByDay", "dayOfWeek"]]
by_day_of_week

Unnamed: 0,date,totalByDay,dayOfWeek
0,2021-04-13,388808.0,Tuesday
1,2021-04-12,261096.0,Monday
2,2021-04-11,258888.0,Sunday
3,2021-04-10,586339.0,Saturday
4,2021-04-09,557014.0,Friday
...,...,...,...
88,2021-01-15,345525.0,Friday
89,2021-01-14,321951.0,Thursday
90,2021-01-13,288688.0,Wednesday
91,2021-01-12,223726.0,Tuesday


In [119]:
(by_day_of_week.loc[by_day_of_week["dayOfWeek"] == "Monday"]).sort_values("date")[-2:]

Unnamed: 0,date,totalByDay,dayOfWeek
8,2021-04-05,105334.0,Monday
1,2021-04-12,261096.0,Monday


In [131]:
day_of_week_grouping = by_day_of_week.sort_values("date").groupby("dayOfWeek")
this_week = day_of_week_grouping.nth(-1)[["totalByDay"]]
last_week = day_of_week_grouping.nth(-2)[["totalByDay"]]

latest = pd.merge(last_week, this_week, on=["dayOfWeek"], suffixes=["LastWeek", "ThisWeek"])
latest.insert(0, "dayOfWeek", list(this_week.index))
latest.loc[:, "totalByDayLastWeek"] = latest.totalByDayLastWeek.astype(int)
latest.loc[:, "totalByDayThisWeek"] = latest.totalByDayThisWeek.astype(int)
latest.loc[:, "change"] = 100 * (latest.totalByDayThisWeek - latest.totalByDayLastWeek) / latest.totalByDayLastWeek
latest.style.format({
    "totalByDayLastWeek": "{:,d}",
    "totalByDayThisWeek": "{:,d}",
    "change": "{:.2f}"
}).hide_index()

dayOfWeek,totalByDayLastWeek,totalByDayThisWeek,change
Friday,354051,557014,57.33
Monday,105334,261096,147.87
Saturday,286799,586339,104.44
Sunday,95763,258888,170.34
Thursday,616234,545511,-11.48
Tuesday,272020,388808,42.93
Wednesday,627008,507926,-18.99
