# Assignment 3

## Jupiter Notebook 2: Plotly Graphs


- Student Name: Karina Jonina
- Student ID: c00278440

- Module Name: Programming for Data Scientists
- Module Code: PROGC5201
- Module Leader: Paul Barry

- Course Name: 		Master of Science in Data Science
- Course Code:		 CW_KCDAR_M Y5

- Due Date: 10th January 2022 9:00am

In [None]:
import pandas as pd
import datetime
from datetime import datetime
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [None]:
# reading in the tidy data csv
df = pd.read_csv('tidy_data_df.csv', parse_dates = ['Date'])

### Checking tidy_data_df.csv

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df['Column'].unique()

In [None]:
# Assigning correct type to columns
df['Column'] = df['Column'].astype('category')
df['Age Range'] = df['Age Range'].astype('category')
df['Age Range'].unique()

In [None]:
# reordering age range categories to be in the correct order 
df['Age Range'] = df['Age Range'].cat.reorder_categories(
    ['1-4','5-14','15-24', '25-34', '35-44', '45-54', '55-64','65-74', '75-84','85+'], ordered=True)

In [None]:
# reading the date as string and then converting it back to date in the format desired
df['Date'] = df['Date'].astype(str)

df['Date'] = pd.to_datetime(df['Date'], errors='ignore', format='%Y-%B-%d')


In [None]:
df.info()

In [None]:
# replacing all Nan with 0
df['Daily Cases'] = df['Daily Cases'].fillna(0)

#changing the 0.0 to 0
df['Daily Cases'] = df['Daily Cases'].astype(int)

df.head()

In [None]:
df.info()

### Creating New Dataset

###### Dataset for Confirmed Covid Cases with Age Range

In [None]:
# selecting all rows where 'Column' starts with 'Aged' 
aged_df = df[df['Column'].str.startswith("Aged")].reset_index()

# column 'Cases' actually represents 'Accumulated Cases'
aged_df = aged_df.rename(columns={'Cases': 'Accumulated Cases'})

# assinging 0 to all NaN in this dataset
aged_df['Accumulated Cases'] = aged_df['Accumulated Cases'].fillna(0)

In [None]:
aged_df[aged_df['Age Range'] == '65-74']

###### Dataset for Hospitalised Covid Cases with Age Range

In [None]:
# selecting all rows where 'Column' contains with 'Hospitalised Aged'
hospitalised_df = df[df['Column'].str.contains("Hospitalised Aged")]

# column 'Cases' actually represents 'Accumulated Cases'
hospitalised_df = hospitalised_df.rename(columns={'Cases': 'Accumulated Cases'})

# assinging 0 to all NaN in this dataset
hospitalised_df['Accumulated Cases'] = hospitalised_df['Accumulated Cases'].fillna(0)

hospitalised_df.tail(10)

###### Dataset for Confirmed Covid Cases Hospitalised Covid Cases with Age Range

In [None]:
# selecting all rows where 'Column' contains with 'Aged' so that all rows with 'Aged' and 'Hospitalised Aged'
aged_and_hospitalised_df = df[df['Column'].str.contains("Aged")]

# column 'Cases' actually represents 'Accumulated Cases'
aged_and_hospitalised_df = aged_and_hospitalised_df.rename(columns={'Cases': 'Accumulated Cases'})

# assinging 0 to all NaN in this dataset
aged_and_hospitalised_df['Accumulated Cases'] = aged_and_hospitalised_df['Accumulated Cases'].fillna(0)

aged_and_hospitalised_df.tail()

###### Dataset for Confirmed Covid Cases (NO AGE_RANGE)

In [None]:
# selecting all rows where 'Column' starts with 'Confirmed Covid Cases' 
confirmed_cc_df = df[df['Column'].str.startswith("Confirmed Covid Cases")]

# in this dataset, the 'age range' and 'daily cases' columns are empty
confirmed_cc_df = confirmed_cc_df.drop(columns = ['Age Range', 'Daily Cases'])

# renaming column 'Cases' to 'Daily Cases'
confirmed_cc_df = confirmed_cc_df.rename(columns={'Cases': 'Daily Cases'})

# assinging 0 to all NaN in this dataset
confirmed_cc_df['Daily Cases'] = confirmed_cc_df['Daily Cases'].fillna(0)

confirmed_cc_df.tail(10)

###### Dataset for Hospitalised Covid Cases (NO AGE_RANGE)

In [None]:
# selecting all rows where 'Column' contains 'Hospitalised Covid Cases' 
hospital_cc_df = df[df['Column'].str.contains("Hospitalised Covid Cases")]

# in this dataset, the age range is empty
hospital_cc_df = hospital_cc_df.drop(columns = ['Age Range'])

# column 'Cases' actually represents 'Accumulated Cases'
hospital_cc_df = hospital_cc_df.rename(columns={'Cases': 'Accumulated Cases'})

# assinging 0 to all NaN in this dataset
hospital_cc_df['Daily Cases'] = hospital_cc_df['Daily Cases'].fillna(0)

hospital_cc_df.tail(10)

###### Dataset for Covid Cases Requiring ICU

In [None]:
# selecting all rows where 'Column' contains 'Requiring' so that 'Requiring ICU Covid Cases' is selected
icu_cc_df = df[df['Column'].str.contains("Requiring")]

# in this dataset, the age range is empty
icu_cc_df = icu_cc_df.drop(columns = ['Age Range'])

# column 'Cases' actually represents 'Accumulated Cases'
icu_cc_df = icu_cc_df.rename(columns={'Cases': 'Accumulated Cases'})

# assinging 0 to all NaN in this dataset
icu_cc_df['Daily Cases'] = icu_cc_df['Daily Cases'].fillna(0)

icu_cc_df.tail(10)

###### Dataset for Median Age

In [None]:
# selecting all rows where 'Column' contains 'Median Age' so that 'Median Age' is selected
median_age_df = df[df['Column'].str.contains("Median Age")]

# in this dataset, the 'age range' and 'daily cases' are empty
median_age_df = median_age_df.drop(columns = ['Age Range', 'Daily Cases'])

# column 'Cases' actually represents 'Median Age'
median_age_df = median_age_df.rename(columns={'Cases': 'Median Age'})

# assinging 0 to all NaN in this dataset
icu_cc_df['Daily Cases'] = icu_cc_df['Daily Cases'].fillna(0)

median_age_df.tail()

### Plotly Graph 1

In [None]:
fig = go.Figure()

age = go.Scatter(x = confirmed_cc_df['Date'],
                             y = confirmed_cc_df['Daily Cases'],
                             name = 'Daily Cases',
                             mode = 'lines',
                             customdata = confirmed_cc_df['Column'],
                             hovertemplate = "<b>%{customdata}</b><br><br>" +
                                            "Date: %{x|%d %b %Y} <br>" +
                                            "Covid-19 Cases: %{y:,.}<br>"+
                                             "<extra></extra>",
                             line = dict(color = "#636EFA"))

hospitalisation = go.Scatter(x = hospital_cc_df['Date'],
                             y = hospital_cc_df['Daily Cases'],
                             name = 'Daily Hospitalisation with Covid',
                             mode = 'lines',
                             customdata = hospital_cc_df['Column'],
                             hovertemplate = "<b>%{customdata}</b><br><br>" +
                                            "Date: %{x|%d %b %Y} <br>" +
                                            "Daily Hospitalisation with Covid: %{y:,.}<br>"+
                                             "<extra></extra>",
                             line = dict(color = "#EF553B"))

icu = go.Scatter(x = icu_cc_df['Date'],
                             y = icu_cc_df['Daily Cases'],
                             name = 'Daily ICU Numbers',
                             mode = 'lines',
                             customdata = icu_cc_df['Column'],
                             hovertemplate = "<b>%{customdata}</b><br><br>" +
                                            "Date: %{x|%d %b %Y} <br>" +
                                            "Daily Hospitalisation with Covid: %{y:,.}<br>"+
                                             "<extra></extra>",
                             line = dict(color = "#316395"))

data = [age, hospitalisation, icu]

layout = dict(
        title = 'Daily COVID-19 Cases in Ireland', title_font_size=30,
        yaxis_title = 'Daily COVID-19 Cases',yaxis_title_font_size=20,
    autosize = False, width = 800, height = 600,
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                            dict(count = 7, step = "day", stepmode = "backward", label = "1W"),
                            dict(count = 1, step = "month", stepmode = "backward", label = "1M"),
                            dict(count = 3, step = "month", stepmode = "backward", label = "3M"),
                            dict(count = 6, step = "month", stepmode = "backward", label = "6M"),
                            dict(count = 1, step = "year", stepmode = "backward", label = "1Y"),
                            dict(count = 1, step = "year", stepmode = "todate", label = "YTD"),
                            dict(count = 1, step = "all", stepmode = "backward", label = "MAX")])
        ),
        rangeslider=dict(
            visible = True
        ),
        type='date'
    )
)

fig = go.FigureWidget(data=data, layout=layout)
# fig.update_layout(showlegend=False)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=-0.5,
    xanchor="right",
    x=1
))





fig.show()

### Plotly Graph 2

In [None]:
# in this dataset, the age range is empty
hospitalised_df = hospitalised_df.drop(columns = ['Column', 'Accumulated Cases'])
hospitalised_df.head()

In [None]:
# in this dataset, the age range is empty
aged_df = aged_df.drop(columns = ['Column', 'Accumulated Cases'])
aged_df.head()

In [None]:
hospitalised_table = pd.pivot_table(hospitalised_df, index=['Date'],  values = 'Daily Cases',
                    columns=['Age Range'])

hospitalised_table.head()

In [None]:
aged_table = pd.pivot_table(aged_df, index=['Date'], values = 'Daily Cases',
                    columns=['Age Range'])

aged_table.head()

In [None]:
columns = list(aged_table.columns)
columns

In [None]:

fig = go.Figure()

# set up ONE trace
fig.add_trace(go.Scatter(x=aged_table.index,
                         y=aged_table[aged_table.columns[0]],
                         visible=True, line = dict(color = "#636EFA"),
                        mode ='lines', 
                        hovertemplate="<b>Confirmed COVID-19 Cases</b><br><br>" +
                                            "Date: %{x|%d %b %Y} <br>" +
                                            "Daily Covid-19 Cases: %{y:,.}<br>"+
                                            "<extra></extra>"))

updatemenu = []
buttons = []

# button with one option for each dataframe
for col in aged_table.columns:
    buttons.append(dict(method='restyle',
                        label=col,
                        visible=True,
                        args=[{'y':[aged_table[col]],
                               'x':[aged_table.index],
                               'type':'scatter'}, [0]],
                        )
                  )

# some adjustments to the updatemenus
updatemenu = []
your_menu = dict()
updatemenu.append(your_menu)

updatemenu[0]['buttons'] = buttons
updatemenu[0]['direction'] = 'down'
updatemenu[0]['showactive'] = True



# add dropdown menus to the figure
fig.update_layout(showlegend=False, updatemenus=updatemenu,         
        title = 'Daily COVID-19 Cases in Ireland', title_font_size=24,
        yaxis_title = 'Daily COVID-19 Cases',yaxis_title_font_size=16,)
fig.show()

In [None]:
fig = go.Figure()

# set up ONE trace
fig.add_trace(go.Scatter(x=hospitalised_table.index,
                         y=hospitalised_table[hospitalised_table.columns[0]],
                         visible=True, line = dict(color = "#EF553B"),
                        mode ='lines', 
                        hovertemplate="<b>Hospitalised COVID-19 Cases</b><br><br>" +
                                            "Date: %{x|%d %b %Y} <br>" +
                                            "Daily Covid-19 Cases: %{y:,.}<br>"+
                                            "<extra></extra>"))

updatemenu = []
buttons = []

# button with one option for each dataframe
for col in hospitalised_table.columns:
    buttons.append(dict(method='restyle',
                        label=col,
                        visible=True,
                        args=[{'y':[hospitalised_table[col]],
                               'x':[hospitalised_table.index],
                               'type':'scatter'}, [0]],
                        )
                  )

# some adjustments to the updatemenus
updatemenu = []
your_menu = dict()
updatemenu.append(your_menu)

updatemenu[0]['buttons'] = buttons
updatemenu[0]['direction'] = 'down'
updatemenu[0]['showactive'] = True



# add dropdown menus to the figure
fig.update_layout(showlegend=False, updatemenus=updatemenu,         
        title = 'Hospitalised COVID-19 Cases in Ireland', title_font_size=24,
        yaxis_title = 'Hospitalised COVID-19 Cases',yaxis_title_font_size=16)
fig.show()

### Plotly Graph 3

In [None]:
# finding the max value in 'daily cases' column
max_hospitalised_df = hospitalised_df['Daily Cases'].max() + 10
max_hospitalised_df

In [None]:
fig = px.bar(hospitalised_df, 
             x="Age Range", 
             y="Daily Cases", 
             color_discrete_sequence=["#EF553B"],
             animation_frame="Date", 
             animation_group="Age Range", 
             range_y=[0,max_hospitalised_df],
            labels = {"x": "Date", "y": "Hospitalised Cases"},
            title = "Hospitalised COVID-19 Cases by Age Range",
            category_orders={"Age Range": ['1-4','5-14','15-24', '25-34', '35-44', 
                                           '45-54', '55-64','65-74', '75-84','85+']})
fig.show()

In [None]:
# finding the max value in 'daily cases' column
max_aged_df = aged_df['Daily Cases'].max() +100
max_aged_df

In [None]:
fig = px.bar(aged_df, 
             x="Age Range", 
             y="Daily Cases", 
             color_discrete_sequence=["#636EFA"],
             animation_frame="Date", 
             animation_group="Age Range", 
             range_y=[0,max_aged_df],
            labels = {"x": "Date", "y": "Daily Cases"},
            title = "COVID-19 Cases by Age Range",
#              barmode='group',
            category_orders={"Age Range": ['1-4','5-14','15-24', '25-34', '35-44', 
                                           '45-54', '55-64','65-74', '75-84','85+']})
fig.show()

### Plotly Graph 4

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])


fig.add_trace(go.Bar(x = confirmed_cc_df['Date'],
                y = confirmed_cc_df['Daily Cases'],
                hovertemplate="<b>Confirmed Covid Cases</b><br><br>" +
                                    "Date: %{x|%d %b %Y} <br>" +
                                    "Daily Covid Cases: %{y:,.}<br>"+
                                    "<extra></extra>"), secondary_y = False)

fig.add_trace(go.Scatter(x = median_age_df['Date'],
                             y = median_age_df['Median Age'],
                            hovertemplate="<b>Median Age</b><br><br>" +
                                    "Date: %{x|%d %b %Y} <br>" +
                                    "Median Age: %{y:,.}<br>"+
                                    "<extra></extra>",
                             line = dict(color="#DDCC96")), secondary_y = True)


fig.update_xaxes(
    rangeslider_visible = True,
    rangeselector = dict(
        buttons = list([
                        dict(count = 7, step = "day", stepmode = "backward", label = "1W"),
                        dict(count = 1, step = "month", stepmode = "backward", label = "1M"),
                        dict(count = 3, step = "month", stepmode = "backward", label = "3M"),
                        dict(count = 6, step = "month", stepmode = "backward", label = "6M"),
                        dict(count = 1, step = "year", stepmode = "backward", label = "1Y"),
                        dict(count = 1, step = "year", stepmode = "todate", label = "YTD"),
                        dict(count = 1, step = "all", stepmode = "backward", label = "MAX")])))
fig.update_layout(xaxis_rangeslider_visible = False)

# Set y-axes titles
fig.update_yaxes(title_text="Covid-19 Cases", secondary_y=False)
fig.update_yaxes(title_text="Median Age", secondary_y=True)

fig.update_layout(showlegend=False)

# Add title
fig.update_layout(
        title = 'Confirmed Covid Cases and Median Age',
        title_font_size = 20)
fig['layout']['yaxis2']['showgrid'] = False

fig.show()


### Plotly Graph 5

In [None]:
# grouping by age range to find out the sum for each age range
total_hospitalised_table = hospitalised_df.groupby(['Age Range'])['Daily Cases'].sum().reset_index()
total_hospitalised_table

In [None]:
# grouping by age range to find out the sum for each age range
total_aged_table = aged_df.groupby(['Age Range'])['Daily Cases'].sum().reset_index()
total_aged_table

In [None]:
fig = make_subplots(rows = 1, cols = 2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=total_aged_table['Age Range'],
                             values=total_aged_table['Daily Cases'], name="Total COVID-19 Cases",
                              hovertemplate="<b>Age Range: %{label}</b><br><br>" +
                                    "Total % of COVID-19: %{percent} <br>"+
                                    "Total # of COVID-19: %{value} <br>"),1, 1)
fig.add_trace(go.Pie(labels = total_hospitalised_table['Age Range'],
                             values = total_hospitalised_table['Daily Cases'], name = "Total Hospitalised COVID-19 Cases",
                             hovertemplate="<b>Age Range: %{label}</b><br><br>" +
                                    "Total % of Hospitalised COVID-19 Cases: %{percent} <br>" +
                                    "Total # of Hospitalised COVID-19 Cases: %{value} <br>"), 1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4)

fig.update_layout(
    title_text="Total COVID-19 Cases by Age Range",
    title_font_size = 15,
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Total <br> Confirmed <br> COVID-19 <br> Cases', x=0.175, y=0.525, font_size=15, showarrow=False),
                 dict(text='Total <br> Hospitalised <br> COVID-19 <br> Cases', x=0.835, y=0.525, font_size=15, showarrow=False)],
    legend_title_text='Age Range')

fig.show()