In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# import the data
df = pd.read_csv("../input/covid19-world-testing-progress/covid-testing.csv")

In [None]:
df.head()

In [None]:
# check for null values
df.info()

In [None]:
# cleaning and rearrange

# we don't need notes
df.drop('Notes', inplace=True,axis=1)

In [None]:
# change Column name 'Entity' to 'Country'
df.rename(columns = {'Entity':'Country'}, inplace = True)

In [None]:
# change country name - tests performed to only country name
df['Country'] = df['Country'].apply(lambda x: x.split(" - ")[0].rstrip())

In [None]:
# let check total countries list
print(df['Country'].unique())
print(df['Country'].nunique())

In [None]:
# convert data from object to datetime
df["Date"] = pd.to_datetime(df["Date"])

In [None]:
# lets check daily changes in cumulative total for Pakistan
df_1 = df[df["Country"] == "Pakistan"]
plt.figure(figsize=(12,7))
sns.lineplot(x="Date", y="Daily change in cumulative total", data=df_1)

In [None]:
# lets check daily changes in cumulative total for United States
df_2 = df[df["Country"] == "United States"]
plt.figure(figsize=(12,7))
sns.lineplot(x="Date", y="Daily change in cumulative total", data=df_2)

In [None]:
# Which countries has more postive test
df.groupby("Country")["Cumulative total"].sum().sort_values(ascending=False).head(15).plot.bar()

In [None]:
# Which countries has more Short-term tests per case
plt.figure(figsize=(12,8))
df.groupby("Country")["Short-term tests per case"].sum().sort_values(ascending=False).head(30).plot.bar()

In [None]:
df.columns

In [None]:
# lets check Short-term positive rate for Pakistan
plt.figure(figsize=(12,7))
sns.lineplot(x="Date", y="Short-term positive rate", data=df_1)

**Peak cases are between 2020-05 to 2020-07 and 2020-03**

In [None]:
# lets check Short-term positive rate for Romania
df_3 = df[df["Country"] == "Romania"]
plt.figure(figsize=(12,7))
sns.lineplot(x="Date", y="Short-term positive rate", data=df_3)

**Most of the cases are between 2020-10 to 2021-05**

In [None]:
# lets check the correlation
plt.figure(figsize=(12,7))
sns.heatmap(df.corr())

In [None]:
# top 10 most effected countries
top_10 = df[(df["Country"] == "United States") |(df["Country"] == "Italy")|(df["Country"] == "Spain") |(df["Country"] == "Germany")|(df["Country"] == "China")|(df["Country"] == "France")|(df["Country"] == "Iran")|(df["Country"] == "United Kingdom")|(df["Country"] == "Switzerland")|(df["Country"] == "Turkey")]

In [None]:
# let's check Short-term positive rate for top 10 countries with most positive rate
plt.figure(figsize=(20,15))
sns.lineplot(data=top_10,x="Date",y="Short-term positive rate",marker='o',hue="Country")
plt.show()

**Right now Iran has most positive cases**

In [None]:
# let check (7-day smoothed daily change) for country with most positive cases 
USA_df = df[df["Country"] == "United States"]
UK_df = df[df["Country"] == "United Kingdom"]
IT_df = df[df["Country"] == "Italy"]
GE_df = df[df["Country"] == "Germany"]
IND_df = df[df["Country"] == "India"]

plt.figure(figsize=(17,10))
plt.plot(USA_df["Date"], USA_df["7-day smoothed daily change"], label="United States")
plt.plot(UK_df["Date"], UK_df["7-day smoothed daily change"], label="United Kingdom")
plt.plot(IT_df["Date"], IT_df["7-day smoothed daily change"], label="Italy")
plt.plot(GE_df["Date"], GE_df["7-day smoothed daily change"], label="Germany")
plt.plot(IND_df["Date"], IND_df["7-day smoothed daily change"], label="India")

plt.xlabel("Date")
plt.ylabel("7-day smoothed daily change")
plt.legend()
plt.show()

In [None]:
# let check (Short-term tests per case) for country with most positive cases 
USA_df = df[df["Country"] == "United States"]
UK_df = df[df["Country"] == "United Kingdom"]
IT_df = df[df["Country"] == "Italy"]
GE_df = df[df["Country"] == "Germany"]
IND_df = df[df["Country"] == "India"]

plt.figure(figsize=(17,10))
plt.plot(USA_df["Date"], USA_df["Short-term tests per case"], label="United States")
plt.plot(UK_df["Date"], UK_df["Short-term tests per case"], label="United Kingdom")
plt.plot(IT_df["Date"], IT_df["Short-term tests per case"], label="Italy")
plt.plot(GE_df["Date"], GE_df["Short-term tests per case"], label="Germany")
plt.plot(IND_df["Date"], IND_df["Short-term tests per case"], label="India")

plt.xlabel("Date")
plt.ylabel("Short-term tests per case")
plt.legend()
plt.show()