In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import dask.dataframe as dd

In [2]:
url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"

In [3]:
df = dd.read_csv(url, blocksize=None, dtype={'tests_units': 'object'})

In [4]:
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,AFG,Asia,Afghanistan,2020-02-24,1.0,1.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
1,AFG,Asia,Afghanistan,2020-02-25,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
2,AFG,Asia,Afghanistan,2020-02-26,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
3,AFG,Asia,Afghanistan,2020-02-27,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
4,AFG,Asia,Afghanistan,2020-02-28,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511


In [5]:
worlddf = df[df["location"] == "World"]
countrydf = df[df["location"] == "Bangladesh"]

In [6]:
worlddf.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
82045,OWID_WRL,,World,2020-01-22,557.0,0.0,,17.0,0.0,,...,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737
82046,OWID_WRL,,World,2020-01-23,655.0,98.0,,18.0,1.0,,...,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737
82047,OWID_WRL,,World,2020-01-24,941.0,286.0,,26.0,8.0,,...,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737
82048,OWID_WRL,,World,2020-01-25,1433.0,492.0,,42.0,16.0,,...,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737
82049,OWID_WRL,,World,2020-01-26,2118.0,685.0,,56.0,14.0,,...,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737


In [7]:
db = pd.read_csv(url)

In [8]:
db.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,AFG,Asia,Afghanistan,2020-02-24,1.0,1.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
1,AFG,Asia,Afghanistan,2020-02-25,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
2,AFG,Asia,Afghanistan,2020-02-26,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
3,AFG,Asia,Afghanistan,2020-02-27,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511
4,AFG,Asia,Afghanistan,2020-02-28,1.0,0.0,,,,,...,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511


In [9]:
world = db[db["location"] == "World"]
country = db[db["location"] == "Bangladesh"]

In [10]:
world.tail(3)

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
82501,OWID_WRL,,World,2021-04-22,144742651.0,899831.0,809435.0,3071857.0,13712.0,12274.857,...,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737
82502,OWID_WRL,,World,2021-04-23,145640413.0,897762.0,815564.571,3086150.0,14293.0,12484.429,...,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737
82503,OWID_WRL,,World,2021-04-24,,,,,,,...,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737


In [11]:
world_total = world[["location", "date", "total_cases", "total_deaths"]]
country_total = country[["location", "date", "total_cases", "total_deaths"]]

In [12]:
world_total = pd.DataFrame(world_total)
world_total.insert(0, 'id', range(0, 0 + len(world_total)))
country_total = pd.DataFrame(country_total)
country_total.insert(0, 'id', range(0, 0 + len(country_total)))

In [13]:
world_total
last_date = world_total.tail(1)
last_date
last_cases = int(last_date["total_cases"])
last_cases
last_death = int(last_date["total_deaths"])
last_death

ValueError: cannot convert float NaN to integer

In [None]:
world_data = world_total[["id", "date", "total_cases"]]
world_total

In [None]:
x = np.array(world_total["id"]).reshape(-1,1)
y = np.array(world_total["total_cases"]).reshape(-1,1)

In [None]:
%matplotlib widget
plt.plot(x, y, color='red', marker='o', label='Total Cases')
plt.xlabel('Days')
plt.ylabel('Increment - Milions')
plt.title("Total Cases Worldwide")
plt.grid(True)
plt.legend()

In [None]:
world_country = [world_total,country_total]
world_country = pd.concat(world_country)

In [None]:
fig = px.line(world_country, x="date", y="total_cases", title='Total Cases World and Country', color='location')
fig.show()


In [None]:
xx = world_total["date"]
yy = world_total["total_cases"]

fig = go.Figure(data=go.Scatter(x=xx, y=yy))
fig.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import mpld3

f, ax = plt.subplots()
xxx = world_total["date"]
yyy = world_total["total_cases"]

line = ax.plot(xxx, yyy)
mpld3.plugins.connect(f, mpld3.plugins.LineLabelTooltip(line[0], label='label 1'))

# mpld3.show()
# mpld3.display()
mpld3.enable_notebook()

In [None]:
regressor = LinearRegression()
regressor.fit(x, y)
y_pred = regressor.predict([[456]])
str(int(y_pred))

In [None]:
poly = PolynomialFeatures(degree=3)
x = poly.fit_transform(x)
x

In [None]:
reg = LinearRegression()
reg.fit(x, y)
accuracy = reg.score(x,y)
print(f"Accuracy = {round(accuracy*100, 2)} %")
predict = reg.predict(x)


%matplotlib widget
plt.plot(predict, color='red', marker='o', label='Total Cases')
plt.xlabel('Days')
plt.ylabel('Increment - Milions')
plt.title("Total Cases Worldwide")
plt.grid(True)
plt.legend()

In [None]:
print(int(reg.predict(poly.fit_transform([[457]]))))