# Monkeypox

Mit dem Skript lassen sich Daten für den Artikel zu Affenpocken manuell aufbereiten. Das automatisierte Skript ist auf Github unter st-methods/monkeypox abgelegt. <br>
LD-Artikel: https://edit.nzz.ch/p/nzz/articles/1685302/edit/canvas

Quelle Github: https://github.com/globaldothealth/monkeypox


In [1]:
import pandas as pd
import webbrowser
import pycountry
import gettext
from datetime import datetime
import os
import numpy as np

In [2]:
german = gettext.translation('iso3166', pycountry.LOCALES_DIR, languages=['de'])
german.install()

In [3]:
# Pycountry shoud be on latest version, i.e. '22.3.5'. Else, flags might not work.
pycountry.__version__

'22.3.5'

## DF einlesen

In [4]:
# read google sheet
sheet_id = '1CEBhao3rMe-qtCbAgJTn5ZKQMRFWeAeaiXFpBY3gbHE'
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/gviz/tq?tqx=out:csv"
df_roh = pd.read_csv(url)

In [5]:
# Pivot wider
df = df_roh.groupby(['Country','Status'])['ID'].count().unstack().reset_index()
df= df.rename(columns={'confirmed':'Bestätigt', 'suspected':'Verdacht'})

df.drop('discarded', inplace=True, axis=True)

# add totals column
df['Total']=df.iloc[:,1:].sum(axis=True)

df = df[df['Total']!=0].reset_index(drop = True)

In [6]:

# format integers
df = df.fillna(0)
df.iloc[:,1:] = df.iloc[:,1:].astype(int)

In [7]:
# sum up UK and France
def sum_up_countries(df, subcountries, new_name):
    country_idx = df[df['Country'].isin(subcountries)].index
    country_row = df.iloc[country_idx].sum()
    country_row.loc['Country'] = new_name
    df =df.append(country_row, ignore_index=True)
    df.drop(country_idx, inplace=True)
    return df.reset_index(drop=True)
    
df = sum_up_countries(df, ['England', 'Scotland', 'Wales', 'Northern Ireland','United Kingdom'], 'United Kingdom' )
df = sum_up_countries(df, ['France', 'French Guiana'], 'France' )

In [8]:
df=df.sort_values('Total', ascending=False)

# replace country names with our worldmap ids
df['Country'] = df['Country'].str.replace('Czech Republic', 'Czechia')
df['Country'] = df['Country'].str.replace('Iran', 'Iran, Islamic Republic of')


In [9]:
# temporarily remove kosovo

df = df[df['Country']!='Kosovo']

In [10]:
# Catch country name if not in pycountry
for name in df['Country'].to_list():
    if pycountry.countries.get(name=name)== None and pycountry.countries.get(common_name =name) == None:
        #print(pycountry.countries.get(common_name =name).common_name)
        raise ValueError('Country is not recognised by pycountry:', name)

In [11]:
#df

### Deutsche Ländernamen, Flaggen

In [12]:
# get country object, using common name if there is one
def get_common_name(name):
    if pycountry.countries.get(common_name =name) == None: 
        country = pycountry.countries.get(name=name)
    else:
        country = pycountry.countries.get(common_name =name)
    return country

# return german name
def get_german_name(name):
    country = get_common_name(name)
    if pycountry.countries.get(common_name =name) == None:
        country = country.name
    else:
        country = country.common_name        
    return _(country)

# return flag
def get_flag(name):
    country = get_common_name(name)
    return country.flag


In [13]:
df['Land'] = df['Country'].apply(get_german_name)
df['Flagge'] =  df['Country'].apply(get_flag)

# Iran zurückwandeln
df['Country'] = df['Country'].str.replace('Iran, Islamic Republic of', 'Iran')

df = df[['Country', 'Land', 'Flagge','Bestätigt', 'Verdacht', 'Total']]

In [14]:
#df

## Vergleichen, Version abspeichern

In [15]:
past_csvs= os.listdir('data')

past_csvs.sort()
print(past_csvs)

['.gitkeep', '20220524-16h03.csv']


In [16]:
df_last_version = pd.read_csv('data/'+past_csvs[-1])

In [17]:
print('old == new:', str(df['Total'].sum() == df_last_version['Total'].sum()))
print('new total:', str(df['Total'].sum()))
print('old total:', str(df_last_version['Total'].sum()))

old == new: False
new total: 989
old total: 270


In [18]:
# save most recent data
time_now = datetime.now().strftime("%Y%m%d-%Hh%M")

#df.to_csv('data/'+ time_now+'.csv', index=False)

## Export für Weltkarte
https://q.st.nzz.ch/editor/choropleth/4acf1a0fd4dd89aef4abaeefd0b6f4dc

In [19]:
ids = pd.read_csv('country_ids.csv')

df_worldmap =df.copy()
#df_worldmap = df_worldmap[df_worldmap['Country']!='Malta']
df_worldmap['Country'] = df_worldmap['Country'].str.replace('United States','United States of America')
df_worldmap = df_worldmap.rename(columns ={'Country':'ID', 'Total':'Wert'})

# merge df with ids
df_worldmap = ids.merge(df_worldmap[['ID', 'Wert']], how='left').sort_values('Wert', ascending=False)

In [20]:
# Check if all countries were recognised - is True if ok

ignore_lst = ['Cayman Islands']
total_df = df[~df['Country'].isin(ignore_lst)]['Total'].sum()

print('Sum Worldmap:', str(df_worldmap['Wert'].sum()))
#ok if it's not Malta
print('Sum DF:', str(total_df))
print('Corresponds with sum df:', str(df_worldmap['Wert'].sum() == total_df))

Sum Worldmap: 988.0
Sum DF: 988
Corresponds with sum df: True


In [21]:
# copy to q-element
df_worldmap=df_worldmap.sort_values('ID', key=lambda col: col.str.lower())

df_worldmap.to_clipboard(index=False)
#webbrowser.open('https://q.st.nzz.ch/editor/choropleth/4acf1a0fd4dd89aef4abaeefd0b6f4dc', new=2)

In [22]:
df[df['Total']==1].sort_values('Country')


Status,Country,Land,Flagge,Bestätigt,Verdacht,Total
2,Austria,Österreich,🇦🇹,1,0,1
7,Cayman Islands,Cayman-Inseln,🇰🇾,0,1,1
12,Haiti,Haiti,🇭🇹,0,1,1
13,Hungary,Ungarn,🇭🇺,1,0,1
19,Latvia,Lettland,🇱🇻,1,0,1
20,Malta,Malta,🇲🇹,1,0,1
21,Mexico,Mexiko,🇲🇽,1,0,1
22,Morocco,Marokko,🇲🇦,1,0,1
25,Pakistan,Pakistan,🇵🇰,0,1,1
26,Paraguay,Paraguay,🇵🇾,0,1,1


In [23]:
df_worldmap[df_worldmap['Wert']==1].sort_values('ID')

Unnamed: 0,ID,Wert
8,Austria,1.0
64,Haiti,1.0
67,Hungary,1.0
86,Latvia,1.0
98,Malta,1.0
101,Mexico,1.0
105,Morocco,1.0
120,Pakistan,1.0
123,Paraguay,1.0
148,Sudan,1.0


## Export für Tabelle

https://q.st.nzz.ch/editor/table/4acf1a0fd4dd89aef4abaeefd0da5ac6

In [24]:
df_q_table = df[['Land', 'Flagge', 'Bestätigt', 'Verdacht','Total']].rename(columns = {'Land':'', 'Flagge':''})
#df_q_table

In [25]:
df_q_table.to_clipboard(index=False)

#webbrowser.open('https://q.st.nzz.ch/editor/table/4acf1a0fd4dd89aef4abaeefd0da5ac6')

## Zahlen für LD-Artikel

In [27]:
# Fälle total (inklusive inoffizielle Länder)
df['Total'].sum()

989

## Export Geschlechtergrafik [inaktiv]
https://q.st.nzz.ch/editor/chart/4913f749b598fb2ecc9721cb17e708e9

In [29]:
gender_dict = {'Male':'Männer', 'male':'Männer',
              'female':'Frauen',np.nan:'Keine Daten'}

In [30]:
df_gender = df_roh.copy()
df_gender['Gender'] = df_gender['Gender'].replace(gender_dict)

# only group confirmed cases
df_q_gender = df_gender[df_gender['Status']=='confirmed'].groupby(['Gender'], dropna=False, as_index=False).size()
df_q_gender.rename(columns={'Gender':'Geschlecht', 'size':'Anzahl'})

df_q_gender = df_q_gender.T
df_q_gender.columns = df_q_gender.iloc[0]
df_q_gender.drop('Gender', inplace=True)
df_q_gender = df_q_gender[['Männer', 'Keine Daten', 'Frauen']]

In [31]:
df_q_gender.to_clipboard()
#webbrowser.open('https://q.st.nzz.ch/editor/chart/4913f749b598fb2ecc9721cb17e708e9')

In [32]:
df_q_gender

Gender,Männer,Keine Daten,Frauen
size,286,630,3


In [None]:
df_q_gender.sum(axis=True)

## Test: Zeitreihe

In [33]:

df_roh['Date_onset'] = pd.to_datetime(df_roh['Date_onset'])
df_roh['Date_confirmation'] = pd.to_datetime(df_roh['Date_confirmation'])
df_roh['Date_entry'] = pd.to_datetime(df_roh['Date_entry'])
df_roh['Date_last_modified'] = pd.to_datetime(df_roh['Date_last_modified'])

In [34]:
import altair as alt

In [35]:
alt.Chart(df_roh[df_roh['Status'].isin(['confirmed', 'suspected'])]).mark_bar(size=10).encode(
    x=alt.X('Date_entry:T'  ),
    y=alt.Y('count()'), 
    color = alt.Color('Status'),
    order=alt.Order(
      # Sort the segments of the bars by this field
      'Status',
      sort='ascending'
    )
).properties(
    height = 200, 
    title = 'Anzahl Fälle nach Meldedatum'
)

## Debugging

* Replace ISO3 with country code that is not recognised
* Then copy name and use it to replace current country name at line 35 in py-skript.

### Länder finden für Pycountry

In [None]:

ISO3 = 'XXK' 

pycountry.countries.get(alpha_3=ISO3)

In [None]:
pycountry.countries.get(alpha_3='BOL').common_name

In [None]:
amendments = pd.read_csv('pycountry_amendments.csv')

In [None]:
amendments

### Länder finden für Worldmap

In [36]:
ids[ids['ID'].str.contains('Kosovo')]

Unnamed: 0,ID
82,Kosovo


In [37]:
df_worldmap[df_worldmap['ID'].str.contains('Malta')]

Unnamed: 0,ID,Wert
98,Malta,1.0


In [38]:
one_value = ['Mexico', 'Ecuador', 'Pakistan', 'Bolivia','Malta', 'Sudan', 'Austria', 'Finland']
df_worldmap[df_worldmap['ID'].isin(one_value)]

Unnamed: 0,ID,Wert
8,Austria,1.0
16,Bolivia,3.0
42,Ecuador,
51,Finland,2.0
98,Malta,1.0
101,Mexico,1.0
120,Pakistan,1.0
148,Sudan,1.0
