#### Imports

In [1]:
import pandas as pd
import re
import requests
from requests import get
from bs4 import BeautifulSoup
import warnings
from time import sleep
from random import randint
from time import time
timestart_time = time()
warnings.warn('Warning Simulation')
from IPython.core.display import clear_output

  # This is added back by InteractiveShellApp.init_path()


In [2]:
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

#### Read our datasets and store variables for later use

In [3]:
greece = pd.read_csv('https://docs.google.com/spreadsheets/d/e/2PACX-1vRpR8AOJaRsB5by7H3R_GijtaY06J8srELipebO5B0jYEg9pKugT3C6Rk2RSQ5eyerQl7LolshamK27/pub?gid=1017539712&single=true&output=csv')
# ('https://raw.githubusercontent.com/iMEdD-Lab/open-data/master/COVID-19/greece.csv')
# ('https://docs.google.com/spreadsheets/d/e/2PACX-1vRpR8AOJaRsB5by7H3R_GijtaY06J8srELipebO5B0jYEg9pKugT3C6Rk2RSQ5eyerQl7LolshamK27/pub?gid=1017539712&single=true&output=csv')
greeceTimeline = pd.read_csv('https://raw.githubusercontent.com/iMEdD-Lab/open-data/master/COVID-19/greeceTimeline.csv')

#total number of deaths in our current dataset
greecedeadsum = greece.dead.sum()

#number of deaths for which location remains unknown in our current dataset
greecedeadunknown = greece[greece['county_en'] == 'Unknown']['dead']

#max number of daily deaths recorded till now, according to our dataset
upperthresh = greeceTimeline[greeceTimeline.Status == 'deaths'].max(axis=1).tolist()[0]

#### Scrape Google results, clean them and store them in a df

In [4]:
#Searching for "dead Greece" ("νεκροί Ελλάδα") in pages written in Greek within an hour
raw_html = requests.get('https://www.google.com/search?q=%CE%BD%CE%B5%CE%BA%CF%81%CE%BF%CE%B9+%CE%B5%CE%BB%CE%BB%CE%AC%CE%B4%CE%B1&rlz=1C1GCEU_enGR865GR865&tbas=0&tbs=qdr:h,lr:lang_1el&sxsrf=ALeKk03bddItnMzR_Zql20kk2bhn6Q8Dzg:1590053633527&source=lnt&lr=lang_el&sa=X&ved=2ahUKEwjb4f-v08TpAhVNwMQBHXHTDXoQpwV6BAgLEBo&biw=1536&bih=722')

# raw_html

In [5]:
soup = BeautifulSoup(raw_html.content, "html.parser") 
# print(soup.prettify())
# html=list(soup.children)[1]
# container = list(html.children)[1]
# box = list(container.children)[1]
# results = box.find_all('div',{'class':'ZINbbc xpd O9g5cc uUPGi'})

results = list(list(list(soup.children)[1].children)[1].children)[1]\
            .find_all('div',{'class':'ZINbbc xpd O9g5cc uUPGi'})

table = []

for result in results:
    entries = {}
    entries['result'] = result.text
    
    table.append(entries)

df = pd.DataFrame(table)

df['deaths'] = df.result.str.extract(r'\s+(\d\d\d)\s+(?:.*?οι\s+νεκροί|.*?τα\s+θύματα|.*?οι\s+θάνατοι)')\
                .astype(float)
df['source'] = df.result.str.extract(r'(?:.*www[.]|.*https:\/\/)(\w+)[.]')


    
df = df[(df.deaths == greecedeadsum) | ((df.deaths <= greecedeadsum+upperthresh) & (df.deaths >= greecedeadsum))] 
# df

In [6]:
df

Unnamed: 0,result,deaths,source
1,"Κοροναϊός: Κατέληξε 91χρονη - 170 οι νεκροί στην Ελλάδα | in.grhttps://www.in.gr › Ελλάδαπριν από 5 λεπτά · ¶λλο ένα θύμα προστέθηκε στη μαύρη λίστα των νεκρών από κοροναϊό στην Ελλλάδα, καθώς μία 91χρονη γυναίκα που νοσηλευόταν στο Γενικό Νοσοκομείο ...",170.0,in
5,ΕΙΔΗΣΕΙΣ - Onmed.grhttps://www.onmed.gr › ygeia-eidhseisπριν από 57 λεπτά · Κορoνοϊός: Στους 169 οι νεκροί στη χώρα μας - 21 νέα κρούσματα - 2.873 συνολικά. 22/05/2020 19:43. Στα 2.873 ανέρχονται τα επιβεβαιωμένα κρούσματα του ...,169.0,onmed
7,"Κορωνοϊός: Κατέληξε 91χρονη - Στους 170 οι νεκροί - proto themahttps://www.protothema.gr › koronoios-katelixe-91hroni-stous-170-oi-nekroiπριν από 24 λεπτά · Κορωνοϊός: Κατέληξε 91χρονη - Στους 170 οι νεκροί ... Δείτε όλες τις τελευταίες Ειδήσεις από την Ελλάδα και τον Κόσμο, τη στιγμή που συμβαίνουν, στο ...",170.0,protothema


#### Check current number of deaths on Google, notify us respectively and update our dataset if needed 

In [7]:
googlemax = df.deaths.max()
googlemax_source = df[df.deaths == googlemax].source.tolist()
mentions = df[df.deaths == googlemax].deaths.count()

# if total number of deaths in our dataset is smaller than max number of deaths in current Google results and
# if what Google says is smaller than our total number of deaths plus the max number of daily deaths recorded and 
# if that is published by more than 2 websites in our Google results
if greecedeadsum < googlemax and googlemax < greecedeadsum+upperthresh and mentions >= 2:

    #then add the difference to the current value of deaths with no known location, save an updated csv and notify us
    greecedeadunknown = greecedeadunknown+(googlemax-greecedeadsum)
    greece.loc[greece['county_en'] == 'Unknown', 'dead'] = greecedeadunknown
    
    greece.to_csv('greece_auto_test.csv',index=False)

    text = 'Breaking: It seems we currently count '+str(df.deaths.max())+' deaths. Source: '+str(googlemax_source)+' // greece.dead was updated and greece_auto_test.csv has been saved.'

# if total number of deaths in our dataset is smaller than max number of deaths in current Google results and
# if what Google says is smaller than our total number of deaths plus the max number of daily deaths recorded but 
# that is published by 2, or less, websites in our Google results
elif greecedeadsum < googlemax and googlemax < greecedeadsum+upperthresh and mentions < 2:
    
    #then just notify us to check it out
    text = 'To be confirmed: It seems we currently count '+str(googlemax)+' deaths. Source: '+str(googlemax_source)

else:
    text = 'Cool: greece.dead is updated! We still count '+str(googlemax)+' deaths. Source: '+str(googlemax_source)

In [8]:
text

"Breaking: It seems we currently count 170.0 deaths. Source: ['in', 'protothema'] // greece.dead was updated and greece_auto_test.csv has been saved."

In [9]:
# t = ' thanassis'
# f'troboukis{t}'

In [10]:
# df.to_csv(f'CSVs/K_report{time.strftime('%Y_%m_%d-%H_%M')}.csv, index=False, encoding = 'utf-8')

In [11]:
print(text)

Breaking: It seems we currently count 170.0 deaths. Source: ['in', 'protothema'] // greece.dead was updated and greece_auto_test.csv has been saved.


In [12]:
# test1 = pd.read_csv('greece_auto_test.csv')
# test1

In [14]:
test = pd.read_csv('greece_auto_test.csv')
test.dead.sum()


170.0