# Number of cases per region in Greece

## Libraries

In [1]:
# calendar, datertime, requests, re,
# pdminer, BeautifulSoup, pandas, json

import calendar
import datetime

import requests
import re

import pdfminer
from bs4 import BeautifulSoup

import pandas as pd

import json

## Dates and Date Manipulation

In [2]:
# Current date, month, year and time in string format
current_date = datetime.datetime.now()
current_month = str(current_date.month)
current_year = str(current_date.year)
current_time = str(current_date.time())[:5]

# Add (dd=)-1 for yesterday's results in case today's results are not ready yet
if int(current_time.replace(':',''))<1900:
    dd = -1
else:
    dd = 0
    
temp_day = current_date.day + dd 

# Procedure to add 0 in front of month's number if number < 10 (e.g. June : 6 => 06)
if int(current_month)<10:
    month = '0' + current_month
else:
    month = current_month
    
# Procedure to add 0 in front of day's number if number < 10 (e.g. 7 of June => 07)
if temp_day<10:
    day = '0' + str(temp_day)
else:
    day = str(temp_day)
#------------------

# Check if (dd=)-1 gives wrong number of day like 00
# Use monthrange(year, month)
# monthrange() : returns weekday of first day of month and number of days in month
if day=='00':
    month = '0' + str(int(month)-1)
    day = str(calendar.monthrange(int(current_year), int(current_month)-1)[-1]) # Get number of days in previous month
    
# Current date
date = day + '/' + month

## Daily report about the covid-19 progression in Greece
## Sourse : National Organization of Public Health (EODY -- ΕΟΔΥ)
##          https://eody.gov.gr/neos-koronaios-covid-19/

In [3]:
# Set name of text file
newpath = 'pdf_to_txt' # Or pdf_to_txt.txt

data = []

with open(newpath, 'r') as report:
    report_content = report.read().replace('\n', ' ')
    
    # Use of regular expressions to find needed data
    init_data_a = re.findall(r'Τα νέα εργαστηριακά επιβεβαιωμένα κρούσματα της νόσου είναι (\d+)',  report_content)
    init_data_b = re.findall(r'Ο συνολικός αριθμός των κρουσμάτων ανέρχεται σε (\d+)' ,  report_content)
    init_data_c = re.findall(r'(\d+) θάνατοι', report_content)
    init_data_d = re.findall(r'ημερήσια μεταβολή [-+]?\d*\.\d+|\d+"', report_content)
    data.append(int(init_data_a[0]))
    data.append(int(init_data_b[0]))
    data.append(int(init_data_c[0]))
    data.append(init_data_d[0].split(' ')[-1]+'%')
    
    report.close()   

## Create json file containing the latest data

In [4]:
# JSON file containing the latest data report
import json

diction = {'Dates': date,
          'Total_Cases':data[1],
          'Daily_Cases':data[0],
          'Daily_Increase':data[-1]}

with open('latest_data.json', 'w') as fp:
    json.dump(diction, fp, indent=4)

## Covid-19 cases by region in Greece
## Source : iMEdD-Lab on Github
##          https://github.com/iMEdD-Lab

In [5]:
regions_url = 'https://raw.githubusercontent.com/iMEdD-Lab/open-data/master/COVID-19/regions_greece.csv'
init_df = pd.read_csv(regions_url)

## Redifine date format : (M/D/YY)
### e.g. 10th of July 2020 will be : 7/10/20

In [6]:
init_df

Unnamed: 0,district,district_EN,pop_11,cases,dead,critical,recovered
0,Περιφέρεια Ανατολικής Μακεδονίας Θράκης,East Macedonia-Thrace,608182.0,469.0,23.0,,
1,Περιφέρεια Κεντρικής Μακεδονίας,Central Macedonia,1882108.0,348.0,21.0,,
2,Περιφέρεια Ηπείρου,Epirus,336856.0,39.0,0.0,,
3,Περιφέρεια Θεσσαλίας,Thessaly,732762.0,204.0,2.0,,
4,Περιφέρεια Ιονίων Νήσων,Ionian Islands,207855.0,43.0,2.0,,
5,Περιφέρεια Δυτικής Ελλάδας,Western Greece,679796.0,90.0,6.0,,
6,Περιφέρεια Στερεάς Ελλάδας και Εύβοιας,Central Greece,547390.0,98.0,0.0,,
7,Περιφέρεια Αττικής,Attica,3753783.0,1811.0,84.0,,
8,Περιφέρεια Πελοποννήσου,Peloponnese,577903.0,205.0,0.0,,
9,Περιφέρεια Βορείου Αιγαίου,North Aegean,199231.0,24.0,1.0,,


## Create dictionairy containing the names of the regions in greek and english
## and also the number of cases per region

In [7]:
# Remove word 'Περιφέρεια' from the first column with greek names of regions
regions = []
for region in init_df['district']:
    new_region = region.replace('Περιφέρεια ', '')
    regions.append(new_region)
init_df['district'] = regions

# Replace words with long length with smaller ones
init_df = init_df.replace(['Ανατολικής Μακεδονίας Θράκης', 'Στερεάς Ελλάδας και Εύβοιας', 'Χωρίς Μόνιμη Κατοικία στην Ελλάδα', 'Χωρίς Γεωγραφικό Προσδιορισμό', 'Non-Residents', 'No Location Provided'], 
                ['Αν. Μακεδονίας Θράκης', 'Στερεάς Ελλάδας', 'Χωρίς Μόνιμη Κατοικία', 'Υπό διερεύνηση', 'Of no Fixed Abode', 'Under Investigation']) 

# Rename column names to match with the svg map variables
init_df.rename(columns = {'district':'region_name_GR', 'district_EN':'region_name_EN', 'cases':'number_of_cases'}, inplace = True) 

# Remove unnecessary for the project columns
init_df.drop(['pop_11','dead','critical', 'recovered'], axis=1, inplace=True)

## Pandas Dataframeme

In [8]:
df = init_df

In [9]:
df

Unnamed: 0,region_name_GR,region_name_EN,number_of_cases
0,Αν. Μακεδονίας Θράκης,East Macedonia-Thrace,469.0
1,Κεντρικής Μακεδονίας,Central Macedonia,348.0
2,Ηπείρου,Epirus,39.0
3,Θεσσαλίας,Thessaly,204.0
4,Ιονίων Νήσων,Ionian Islands,43.0
5,Δυτικής Ελλάδας,Western Greece,90.0
6,Στερεάς Ελλάδας,Central Greece,98.0
7,Αττικής,Attica,1811.0
8,Πελοποννήσου,Peloponnese,205.0
9,Βορείου Αιγαίου,North Aegean,24.0


## Create json file with the latest data per region

In [10]:
regions_list = []
for i in range(len(df)):
    region_diction = {'region_name_gr':df['region_name_GR'][i],
                      'region_name_en':df['region_name_EN'][i],
                      'number_of_cases':int(df['number_of_cases'][i])}
    regions_list.append(region_diction)


with open('cases_per_region.json', 'w') as regions_file:
    json.dump(regions_list, regions_file, indent=4)