In [None]:
import warnings # Supress warnings 
warnings.filterwarnings('ignore')

# Coronavirus (COVID-19) Visualization

This notebook was inspired by: https://www.kaggle.com/therealcyberlord/coronavirus-covid-19-visualization-prediction

This is an initial Analysis on Brazil's Coronavirus cases

If you like the dataset and the Inicial Notebook, please Upvote

In [None]:
import pandas as pd

covid = pd.read_csv('/kaggle/input/covid19-brazil-full-cases-17062021/caso_full.csv')

In [None]:
covid.head()

In [None]:
covid = covid.drop(columns=['estimated_population_2019', 'order_for_place'], axis=1)
covid.head()

Let's take a look at which city has the most cases

In [None]:
import numpy as np
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_dark"

confirmed_cases = pd.DataFrame(covid.groupby('city')['last_available_confirmed'].sum())
confirmed_cases['city'] = confirmed_cases.index
confirmed_cases.index = np.arange(1,5299)

death_cases = pd.DataFrame(covid.groupby('city')['last_available_deaths'].sum())
death_cases['city'] = death_cases.index
death_cases.index = np.arange(1, 5299)

new_confirmed = pd.DataFrame(covid.groupby('city')['new_confirmed'].sum())
new_confirmed['city'] = new_confirmed.index
new_confirmed.index = np.arange(1, 5299)

global_confirmed_cases = confirmed_cases[['city', 'last_available_confirmed']]
global_death_cases = death_cases[['city', 'last_available_deaths']]
global_new_confirmed = new_confirmed[['city', 'new_confirmed']]

fig = px.bar(global_new_confirmed.sort_values('new_confirmed', ascending=False)[:20][::-1], x='new_confirmed', y = 'city', title='New Confirmed Cases in Brazil', height=900, orientation='h')
fig.show()

Comparing New Cases and New Deaths

In [None]:
date_c = covid.groupby('date')['new_confirmed','new_deaths'].sum().reset_index()


from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(rows=1, cols=2, subplot_titles=('New Confirmed','New Deaths'))

trace1 = go.Scatter(
                x=date_c['date'],
                y=date_c['new_confirmed'],
                name="Confirmed",
                line_color='orange',
                mode='lines+markers',
                opacity=0.8)

trace2 = go.Scatter(
                x=date_c['date'],
                y=date_c['new_deaths'],
                name="Deaths",
                line_color='red',
                mode='lines+markers',
                opacity=0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.update_layout(template="plotly_dark",title_text = '<b>Brazilian Spread of the Coronavirus Over Time </b>',
                  font=dict(family="Arial, Balto, Courier New, Droid Sans",color='white'))
fig.show()

In [None]:
sao_paulo = covid[covid['city'] == 'São Paulo'].reset_index()
sao_paulo_date = sao_paulo.groupby('date')['date', 'new_confirmed', 'new_deaths'].sum().reset_index()

brasilia = covid[covid['city'] == 'Brasília'].reset_index()
brasilia_date = brasilia.groupby('date')['date', 'new_confirmed', 'new_deaths'].sum().reset_index()

rio_de_janeiro = covid[covid['city'] == 'Rio de Janeiro'].reset_index()
rio_de_janeiro_date = rio_de_janeiro.groupby('date')['date', 'new_confirmed', 'new_deaths'].sum().reset_index()

Now lets take a closer look at the TOP 3 worst cities in Brazil

## São Paulo

In [None]:
fig = make_subplots(rows=1, cols = 2, subplot_titles=('New Confirmed Cases', 'New Deaths'))

trace1 = go.Scatter(
    x=sao_paulo['date'], 
    y=sao_paulo['new_confirmed'], 
    name = 'Confirmed', 
    line_color = 'orange',
    mode = 'lines+markers',
    opacity=0.8)

trace2 = go.Scatter(
    x = sao_paulo['date'],
    y=sao_paulo['new_deaths'],
    name = 'Deaths',
    line_color = 'red',
    mode = 'lines+markers',
    opacity = 0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.update_layout(template='plotly_dark', title_text='<b>Spread of the Coronavirus Over Time in São Paulo (TOP 1)</b>',
                  font=dict(family='Arial, Balto, Courier New, Droid Sans', color='white'))
fig.show()

## Brasília

In [None]:
fig = make_subplots(rows=1, cols = 2, subplot_titles=('New Confirmed Cases', 'New Deaths'))

trace1 = go.Scatter(
    x=brasilia['date'], 
    y=brasilia['new_confirmed'], 
    name = 'Confirmed', 
    line_color = 'orange',
    mode = 'lines+markers',
    opacity=0.8)

trace2 = go.Scatter(
    x = brasilia['date'],
    y=brasilia['new_deaths'],
    name = 'Deaths',
    line_color = 'red',
    mode = 'lines+markers',
    opacity = 0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.update_layout(template='plotly_dark', title_text='<b>Spread of the Coronavirus Over Time in Brasília (TOP 2)</b>',
                  font=dict(family='Arial, Balto, Courier New, Droid Sans', color='white'))
fig.show()

## Rio de Janeiro

In [None]:
fig = make_subplots(rows=1, cols = 2, subplot_titles=('New Confirmed Cases', 'New Deaths'))

trace1 = go.Scatter(
    x=rio_de_janeiro['date'], 
    y=rio_de_janeiro['new_confirmed'], 
    name = 'Confirmed', 
    line_color = 'orange',
    mode = 'lines+markers',
    opacity=0.8)

trace2 = go.Scatter(
    x = rio_de_janeiro['date'],
    y=rio_de_janeiro['new_deaths'],
    name = 'Deaths',
    line_color = 'red',
    mode = 'lines+markers',
    opacity = 0.8)

fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig.update_layout(template='plotly_dark', title_text='<b>Spread of the Coronavirus Over Time in Rio de Janeiro (TOP 3)</b>',
                  font=dict(family='Arial, Balto, Courier New, Droid Sans', color='white'))
fig.show()

## Comparison

In [None]:
temp = covid.groupby('date')['new_confirmed', 'new_deaths'].sum().reset_index()
temp = temp.melt(id_vars = 'date', value_vars=['new_confirmed', 'new_deaths'], var_name='case', value_name='count')

fig = px.area(temp, x='date', y='count', color='case',
              title='Cases over time: Area Plot', color_discrete_sequence = ['cyan', 'red', 'orange'])

fig.show()

Now let's see which city has the highest Deaths per 100 cases

In [None]:
cleaned_latest = covid[covid['date'] == max(covid['date'])]
flg = cleaned_latest.groupby('city')['new_confirmed', 'new_deaths'].sum().reset_index()

flg['mortalityRate'] = round((flg['new_deaths']/flg['new_confirmed'])*100, 2)
temp = flg[flg['new_confirmed']>100]
temp = temp.sort_values('mortalityRate', ascending=False)

fig = px.bar(temp.sort_values(by="mortalityRate", ascending=False)[:10][::-1],
             x = 'mortalityRate', y = 'city', 
             title='Deaths per 100 Confirmed Cases', text='mortalityRate', height=800, orientation='h',
             color_discrete_sequence=['darkred']
            )
fig.show()

If you have any questions, don't hesitate to comment bellow

Have a great analysis! Hope to see Analysis and Predictions here