In [25]:
import numpy as np
import pandas as pd

df = pd.read_csv('owid-covid-data.csv')


country_code = df["iso_code"].unique() #getting unique country codes

In [51]:
# creating a new dataframe with country code as index and dates as columns

new_df = pd.DataFrame(data={"date": new_header}).sort_values(by="date").reset_index(drop=True)

for code in country_code:
    test = df[df["iso_code"] == code][["date","total_cases"]]
    new_df = new_df.merge(test,how="left",on="date")
    new_df = new_df.rename(mapper={"total_cases": code}, axis=1)

new_df = new_df.T
new_df.columns = new_df.iloc[0] #changing column names to the date
new_df = new_df.drop(index="date",axis=0) #dropping the first row which contain column names

new_df.head()

date,2020-01-01,2020-01-02,2020-01-03,2020-01-04,2020-01-05,2020-01-06,2020-01-07,2020-01-08,2020-01-09,2020-01-10,...,2021-03-13,2021-03-14,2021-03-15,2021-03-16,2021-03-17,2021-03-18,2021-03-19,2021-03-20,2021-03-21,2021-03-22
AFG,,,,,,,,,,,...,55959.0,55985.0,55985.0,55995.0,56016.0,56044.0,56069.0,56093.0,56103.0,56153.0
OWID_AFR,,,,,,,,,,,...,4027050.0,4035710.0,4044120.0,4053100.0,4064220.0,4075560.0,4088220.0,4097420.0,4106520.0,4116100.0
ALB,,,,,,,,,,,...,116821.0,117474.0,118017.0,118492.0,118938.0,119528.0,120022.0,120541.0,121200.0,121544.0
DZA,,,,,,,,,,,...,115143.0,115265.0,115410.0,115540.0,115688.0,115842.0,115970.0,116066.0,116157.0,116255.0
AND,,,,,,,,,,,...,11228.0,11266.0,11289.0,11319.0,11360.0,11393.0,11431.0,11481.0,11517.0,11545.0


Time Series data is almost ready. Just have to fill the missing values due to having no Covid-19 cases with 0. 

We should also remove rows starting with "OWID" like "OWID_AFR" as they are not countries.

Finally, we should replace the country code index with country names.

In [52]:
new_df = new_df.fillna(value=0) #filling NaNs with 0
new_df = new_df[~new_df.index.str.contains(r"OWID")] #dropping rows with OWID

#replacing country code with names
country_name = {}
for code in new_df.index:
    name = df[df["iso_code"] == code]["location"].iloc[0]
    country_name[code] = name
new_df = new_df.rename(country_name,axis=0)
new_df

date,2020-01-01,2020-01-02,2020-01-03,2020-01-04,2020-01-05,2020-01-06,2020-01-07,2020-01-08,2020-01-09,2020-01-10,...,2021-03-13,2021-03-14,2021-03-15,2021-03-16,2021-03-17,2021-03-18,2021-03-19,2021-03-20,2021-03-21,2021-03-22
Afghanistan,0,0,0,0,0,0,0,0,0,0,...,55959.0,55985.0,55985.0,55995.0,56016.0,56044.0,56069.0,56093.0,56103.0,56153.0
Albania,0,0,0,0,0,0,0,0,0,0,...,116821.0,117474.0,118017.0,118492.0,118938.0,119528.0,120022.0,120541.0,121200.0,121544.0
Algeria,0,0,0,0,0,0,0,0,0,0,...,115143.0,115265.0,115410.0,115540.0,115688.0,115842.0,115970.0,116066.0,116157.0,116255.0
Andorra,0,0,0,0,0,0,0,0,0,0,...,11228.0,11266.0,11289.0,11319.0,11360.0,11393.0,11431.0,11481.0,11517.0,11545.0
Angola,0,0,0,0,0,0,0,0,0,0,...,21323.0,21380.0,21407.0,21446.0,21489.0,21558.0,21642.0,21696.0,21733.0,21757.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venezuela,0,0,0,0,0,0,0,0,0,0,...,145379.0,145379.0,146488.0,147028.0,147577.0,148208.0,149145.0,150306.0,151123.0,151894.0
Vietnam,0,0,0,0,0,0,0,0,0,0,...,2553.0,2554.0,2557.0,2560.0,2567.0,2570.0,2571.0,2572.0,2572.0,2575.0
Yemen,0,0,0,0,0,0,0,0,0,0,...,2771.0,2836.0,2908.0,2969.0,3037.0,3126.0,3217.0,3278.0,3418.0,3516.0
Zambia,0,0,0,0,0,0,0,0,0,0,...,84474.0,84797.0,84950.0,85240.0,85502.0,85889.0,86059.0,86273.0,86449.0,86535.0


In [None]:
We are now ready to export the dataset.

In [53]:
new_df.to_csv('new_covid_data2.csv')