# Process the scraped csv data on the fly

In [3]:
# You can use pandas.read to read from a file
import pandas as pd
file_path = "../course_2/countries.csv"
pd.read_csv(file_path).head()

Unnamed: 0,country,iso_code,vaccines,source_name,source_website
0,Albania,ALB,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...
1,Algeria,DZA,Sputnik V,Ministry of Health,https://www.echoroukonline.com/%d9%84%d9%82%d8...
2,Andorra,AND,Pfizer/BioNTech,Government of Andorra,https://www.govern.ad/comunicats/item/12479-pr...
3,Anguilla,AIA,Oxford/AstraZeneca,Ministry of Health,https://www.facebook.com/MinistryofHealthAngui...
4,Argentina,ARG,Sputnik V,Ministry of Health,http://datos.salud.gob.ar/dataset/vacunas-cont...


In [6]:
# a little workaround to allow import outside of a package
import sys
sys.path.append("..")
from vaccination_project.vaccination_source import get_data_source_from_lws

countries = get_data_source_from_lws(0)  # you get the bytes

In [8]:
import pandas as pd

# this won't work, since the read_csv method expects a file name to an actual file or a file-like object
pd.read_csv(countries)

OSError: Expected file path name or file-like object, got <class 'bytes'> type

In [9]:
# this is how we can make the data a file-like obj
import io
file_obj = io.BytesIO(countries)

pd.read_csv(file_obj)  # now it works!

Unnamed: 0,country,iso_code,vaccines,source_name,source_website
0,Albania,ALB,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...
1,Algeria,DZA,Sputnik V,Ministry of Health,https://www.echoroukonline.com/%d9%84%d9%82%d8...
2,Andorra,AND,Pfizer/BioNTech,Government of Andorra,https://www.govern.ad/comunicats/item/12479-pr...
3,Anguilla,AIA,Oxford/AstraZeneca,Ministry of Health,https://www.facebook.com/MinistryofHealthAngui...
4,Argentina,ARG,Sputnik V,Ministry of Health,http://datos.salud.gob.ar/dataset/vacunas-cont...
...,...,...,...,...,...
120,United States,USA,"Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/#vacc...
121,Uruguay,URY,Sinovac,Ministry of Health,https://www.gub.uy/uruguaysevacuna/monitor-vac...
122,Venezuela,VEN,Sputnik V,Government of Venezuela,https://www.semana.com/mundo/articulo/y-el-car...
123,Wales,,"Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/health...


In [10]:
# so let's put together a function for this
def content_to_df(bytes_content):
    file_obj = io.BytesIO(bytes_content)
    return pd.read_csv(file_obj)

In [11]:
content_to_df(bytes_content=countries)

Unnamed: 0,country,iso_code,vaccines,source_name,source_website
0,Albania,ALB,Pfizer/BioNTech,Ministry of Health,https://shendetesia.gov.al/covid19-ministria-e...
1,Algeria,DZA,Sputnik V,Ministry of Health,https://www.echoroukonline.com/%d9%84%d9%82%d8...
2,Andorra,AND,Pfizer/BioNTech,Government of Andorra,https://www.govern.ad/comunicats/item/12479-pr...
3,Anguilla,AIA,Oxford/AstraZeneca,Ministry of Health,https://www.facebook.com/MinistryofHealthAngui...
4,Argentina,ARG,Sputnik V,Ministry of Health,http://datos.salud.gob.ar/dataset/vacunas-cont...
...,...,...,...,...,...
120,United States,USA,"Moderna, Pfizer/BioNTech",Centers for Disease Control and Prevention,https://covid.cdc.gov/covid-data-tracker/#vacc...
121,Uruguay,URY,Sinovac,Ministry of Health,https://www.gub.uy/uruguaysevacuna/monitor-vac...
122,Venezuela,VEN,Sputnik V,Government of Venezuela,https://www.semana.com/mundo/articulo/y-el-car...
123,Wales,,"Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/health...


In [12]:
data = get_data_source_from_lws(1)
content_to_df(data)

Unnamed: 0,country,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million
0,Albania,2021-01-10,0.0,0.0,,,,0.00,0.00,,
1,Albania,2021-01-11,,,,,64.0,,,,22.0
2,Albania,2021-01-12,128.0,128.0,,,64.0,0.00,0.00,,22.0
3,Albania,2021-01-13,188.0,188.0,,60.0,63.0,0.01,0.01,,22.0
4,Albania,2021-01-14,266.0,266.0,,78.0,66.0,0.01,0.01,,23.0
...,...,...,...,...,...,...,...,...,...,...,...
5316,Zimbabwe,2021-03-02,25077.0,25077.0,,3621.0,3005.0,0.17,0.17,,202.0
5317,Zimbabwe,2021-03-03,27970.0,27970.0,,2893.0,2871.0,0.19,0.19,,193.0
5318,Zimbabwe,2021-03-04,30658.0,30658.0,,2688.0,2807.0,0.21,0.21,,189.0
5319,Zimbabwe,2021-03-05,31325.0,31325.0,,667.0,2678.0,0.21,0.21,,180.0
