# Covid-19 Data with plots from Greece

## Libraries

In [1]:
import calendar
import datetime
import pandas as pd

import urllib 
import requests
import re

import pdfminer

import io

from bs4 import BeautifulSoup

from plotly.subplots import make_subplots
import plotly.graph_objects as go
import cufflinks as cf

cf.go_offline()

## Dates & Date Manipulation

### Set date format as : day/month (DD/MM). e.g. 20/05

In [2]:
# Current date, month, year and time in string format
current_date = datetime.datetime.now()
current_month = str(current_date.month)
current_year = str(current_date.year)
current_time = str(current_date.time())[:5]

# Check if time is before 21:00 to be sure about cases report
if int(current_time.replace(':','')) < 2100: 
    dd = 1
else:
    dd = 0
    
# Add (dd=)-1 for yesterday's results in case today's results are not ready yet
temp_day = current_date.day-dd

#------------------
# Procedure to add 0 in front of month's number if number < 10 (e.g. June : 6 => 06)
if int(current_month)<10:
    month = '0' + current_month
else:
    month = current_month
    
# Procedure to add 0 in front of day's number if number < 10 (e.g. 7 of June => 07)
if temp_day<10:
    day = '0' + str(temp_day)
else:
    day = str(temp_day)
#------------------

# Check if (dd=)-1 gives wrong number of day like 00
# Use monthrange(year, month)
# monthrange() : returns weekday of first day of month and number of days in month
if day=='00':
    month = '0' + str(int(month)-1)
    day = str(calendar.monthrange(int(current_year), int(current_month)-1)[-1]) # Get number of days in previous month
    
# Current date
date = day + '/' + month

## Read local csv with pandas

In [3]:
filename = 'covid-19_greece_data.csv'
# Pandas DataFrame
# Read csv file with all cases before today's report
df_init = pd.read_csv(filename)

## Weather Data - Max temperature per day
### Source : Acharnes Weather Station (http://www.meteoacharnes.gr/statistika/datasummary.htm)
### If new data are not available yet then fill dataframe with the mean of max temperatures of the last two days

In [4]:
# URL of current month data about temperatures
meteoach_url = 'http://www.meteoacharnes.gr/Reports/NOAAMO' + month + current_year[-2:] + '.txt'

# Read .txt file from url and save only the max temperature of the current day
max_temp_file = urllib.request.urlopen(meteoach_url)

# Save values of Max Temperatures column as floating point numbers
df_init['Max Temperatures'] = df_init['Max Temperatures'].astype(float)

# Check if number of day is < 10
# For example if day='05' then set tday to ' 5' to find the corresponding data
if int(temp_day)<10:
    tday = ' ' + str(temp_day)
    num = 5
else:
    tday = str(temp_day)
    num = 4

key1 = False
for init_line in max_temp_file:
    line = init_line.decode("utf-8")
    if line[:2] == tday:
        yt = line.split(' ')[num].replace(',','.')
        key1 = True
    else:
        tt = df_init['Max Temperatures'][-2:].mean() # mean of max temperatures of last two days
        
if key1:
    max_temp = float(yt)
else:
    max_temp = tt

## Current data from Hellenic National Public Health Organization (EODY)
### Source : EODY reports (https://eody.gov.gr/epidimiologika-statistika-dedomena/ektheseis-covid-19/) 

In [5]:
# Set curret pdf url
eody_url = 'https://eody.gov.gr/wp-content/uploads/' + current_year + '/' + month + '/covid-gr-daily-report-' + current_year + month + day + '.pdf'
pdf_file = requests.get(eody_url)

# Save daily report pdf
open('daily_report.pdf', 'wb').write(pdf_file.content)

# Save daily_report.pdf content to text file using pdminer and pdf2txt.py
# pdf2txt.py [-o output_file] initial_pdf_file
!pdf2txt.py -o pdf_to_txt daily_report.pdf

# Set name of text file
newpath = 'pdf_to_txt' # Or pdf_to_txt.txt

data = []

with open(newpath, 'r') as report:
    report_content = report.read()
    temp_content = report_content.split('\n\n')
    needed_content = temp_content[3]
    
    # Use of regular expressions to find needed data
    init_data_a = re.findall(r'Τα νέα εργαστηριακά επιβεβαιωμένα κρούσματα της νόσου είναι (\d+)',  needed_content)
    init_data_b = re.findall(r'Ο συνολικός αριθμός των κρουσμάτων ανέρχεται σε (\d+)' ,  needed_content)
    init_data_c = re.findall(r'(\d+) θάνατοι', needed_content)
    data.append(int(init_data_a[0]))
    data.append(int(init_data_b[0]))
    data.append(int(init_data_c[0]))
    
    report.close()

## Check for number of total recovered until today

In [6]:
recovered = 1374

## Construct a dictionairy of the daily instance

In [7]:
# Save all the above daily data into a dictionairy
diction = {'Dates': date,
           'Total Cases': data[1],
           'Daily Cases': data[0],
           'Total Recovered': recovered,
           'Active Cases': data[1]-recovered-data[-1],
           'Max Temperatures': max_temp}

## Add new data on the dataframe
### Local file : covid-19_greece_data.csv

In [8]:
# New Dataframe with concatination
df = pd.concat([df_init, pd.DataFrame([pd.Series(diction)], index = [len(df_init)])])
last_row = df.iloc[[-1]].to_string(header=False, index=False, index_names=False).split('\n')

values = [','.join(value.split()) for value in last_row]

start_row = ''
# Save daily data into csv file with ',' seperated
for i in last_row[0].split():
    start_row += i + ','
    
with open(filename, 'r') as fr:
    if df['Dates'][len(df.Dates)-1] in fr.read():
        key4 = True
    else:
        key4 = False

if not key4:
    with open(filename, 'a') as fd:
        fd.write(start_row[:-1])

In [9]:
df

Unnamed: 0,Dates,Total Cases,Daily Cases,Total Recovered,Active Cases,Max Temperatures
0,26/02,1,1,0,1,18.8
1,27/02,3,2,0,3,18.8
2,28/02,4,1,0,4,14.9
3,29/02,7,3,0,4,12.5
4,01/03,7,0,0,7,15.5
...,...,...,...,...,...,...
134,09/07,3672,50,1374,2105,29.7
135,10/07,3732,60,1374,2165,31.6
136,11/07,3772,41,1374,2205,32.9
137,12/07,3803,31,1374,2236,33.0


In [10]:
# Last 5 days of data
df.tail()

Unnamed: 0,Dates,Total Cases,Daily Cases,Total Recovered,Active Cases,Max Temperatures
134,09/07,3672,50,1374,2105,29.7
135,10/07,3732,60,1374,2165,31.6
136,11/07,3772,41,1374,2205,32.9
137,12/07,3803,31,1374,2236,33.0
138,13/07,3826,24,1374,2259,30.9


In [11]:
# Set dates column as index
df.set_index('Dates', inplace=True)
df.tail()

Unnamed: 0_level_0,Total Cases,Daily Cases,Total Recovered,Active Cases,Max Temperatures
Dates,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
09/07,3672,50,1374,2105,29.7
10/07,3732,60,1374,2165,31.6
11/07,3772,41,1374,2205,32.9
12/07,3803,31,1374,2236,33.0
13/07,3826,24,1374,2259,30.9


## Data in Plots
### Click and drag mouse to select specific area of plot to zoom in.  Move mouse over bars to see data

## Total Cases per Day

In [12]:
df['Total Cases'].iplot(kind='scatter',mode='lines+markers', color='blue', size=7,
                        xTitle='Dates', yTitle='Number of Cases',
                        title='<b>Total Cases of Covid_19 in Greece ('+str(len(df['Total Cases']))+' days)</b>')

## Daily Cases

In [13]:
df['Daily Cases'].iplot(kind='bar', color='blue',
                        xTitle='Dates', yTitle='Number of Cases',
                        title='<b>Daily Cases of Covid_19 in Greece ('+str(len(df['Total Cases']))+' days)</b>')

## Total Cases and Total Recovered

In [14]:
# Figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": False}]])

# Traces - Bars and line
fig.add_trace(
    go.Bar(x=df.index, y=df['Total Recovered'], name='Total Recovered'),
)

fig.add_trace(
    go.Bar(x=df.index, y=df['Total Cases'], name='Total Cases', opacity=0.7),
)

# Figure Title
fig.update_layout(
    title_text='<b>Total Cases and Total Recovered of Covid_19 in Greece ('+str(len(df['Total Cases']))+' days)</b>'
)

# x-axis and y-axes titles
fig.update_xaxes(title_text='Dates')
fig.update_yaxes(title_text='Number of Total Cases')
fig.update_yaxes(title_text='Number of Total Recovered')

fig.update_layout(hovermode='x unified')

fig.show()

## Total and Active number of Cases

In [15]:
df[['Total Cases', 'Active Cases']].iplot(kind='scatter',mode='lines+markers', color=['orange', 'blue'], size=7,
                        xTitle='Dates', yTitle='Number of Cases',
                        title='<b>Total and Active Cases of Covid_19 in Greece ('+str(len(df['Total Cases']))+' days)</b>')
# Click and drag mouse to select specific area of plot to zoom in
# Move mouse over markers to see data

## Total and Daily number of cases

In [16]:
# Figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Traces - Bars and line
fig.add_trace(
    go.Scatter(x=df.index, y=df['Total Cases'], name='Total Cases', line_width=4, line_color='orange'),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df.index, y=df['Daily Cases'], name='Daily Cases', line_color='blue',opacity = 0.7),
    secondary_y=True,
)

# Figure Title
fig.update_layout(
    title_text='<b>Total and Daily Cases of Covid_19 in Greece ('+str(len(df['Total Cases']))+' days)</b>'
)

# x-axis and y-axes titles
fig.update_xaxes(title_text='Dates')
fig.update_yaxes(title_text='Number of Total Cases', secondary_y=False)
fig.update_yaxes(title_text='Number of Daily Cases', secondary_y=True)

fig.update_layout(hovermode='x unified')

fig.show()

In [17]:
# Figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Traces - Bars and line
fig.add_trace(
    go.Scatter(x=df.index, y=df['Total Cases'], name='Total Cases', line_width=4, line_color='orange'),
    secondary_y=False,
)

fig.add_trace(
    go.Bar(x=df.index, y=df['Daily Cases'], name='Daily Cases', marker_color='blue', opacity = 0.7),
    secondary_y=True,
)

# Figure Title
fig.update_layout(
    title_text='<b>Total and Daily Cases of Covid_19 in Greece ('+str(len(df['Total Cases']))+' days)</b>'
)

# x-axis and y-axes titles
fig.update_xaxes(title_text='Dates')
fig.update_yaxes(title_text='Number of Total Cases', secondary_y=False)
fig.update_yaxes(title_text='Number of Daily Cases', secondary_y=True)

fig.update_layout(hovermode='x unified')

fig.show()

## Max Temperatures per Day

In [18]:
fig = make_subplots()
fig.add_trace(
    go.Scatter(x=df.index, y=df['Max Temperatures'], mode='lines+markers', line_color = 'green',
               hovertemplate = 'Max Temp : %{y:.2f} ℃',name = 'Max Temps'),
    secondary_y=False,
)

# Title of figure, x-axis and y-axis titles
fig.update_layout(
    title_text='<b>Max Temperatures per Day (' +str(len(df['Total Cases']))+' days)</b>'
)
fig.update_xaxes(title_text='Dates')
fig.update_yaxes(title_text='Max Temperatures', secondary_y=False)

fig.show()

## Daily Cases and Max Temperature per day

In [19]:
# Figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Traces - Bars and line
fig.add_trace(
    go.Scatter(x=df.index, y=df['Daily Cases'], mode='lines+markers', name='Daily Cases', line_width=2),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df.index, y=df['Max Temperatures'], opacity=0.5,
               mode='lines+markers', name='Max Temp', line_width=2),
    secondary_y=True,
)

# Figure Title
fig.update_layout(
    title_text='<b>Daily Cases of Covid_19 and Max Temperatures in Greece ('+str(len(df['Total Cases']))+' days)</b>'
)

# x-axis and y-axes titles
fig.update_xaxes(title_text='Dates')
fig.update_yaxes(title_text='Number of Daily Cases', secondary_y=False)
fig.update_yaxes(title_text='Max Temperatures', secondary_y=True)

fig.update_layout(hovermode='x unified')
fig.show()

# Sources -Values taken from  :  
### Covid19.gov.gr : https://covid19.gov.gr/covid19-live-analytics/
### Acharnes weather station statistics : http://www.meteoacharnes.gr/statistika/datasummary.htm
### Worldometers : https://www.worldometers.info/coronavirus/country/greece/ 