In [None]:
import os, pandas as pd, networkx as nx, numpy as np, statsmodels.api as sm
from sqlalchemy import create_engine
from datetime import date, datetime, timedelta
import matplotlib.pyplot as plt, seaborn as sns

## parameters

In [None]:
yeari, yearf = '2024', '2024'
weeki, weekf = '18', '31'

In [None]:
di = datetime.strptime(f'{yeari}-{weeki}-1', "%Y-%W-%w").date()
df = datetime.strptime(f'{yearf}-{weekf}-1', "%Y-%W-%w").date() + timedelta(6)
ds = [di+timedelta(dt) for dt in range((df-di).days+1)]
print(di, 'until', df)

## database connection

In [None]:
# database credentials
db_usr, db_pwd = os.getenv('DB_USR'), os.getenv('DB_PWD') # your database user name and password
# database login
host, port, db = 'nc-health-data-prod.cluster-ccsgl7rk4urn.eu-central-1.rds.amazonaws.com', 5432, 'master'

In [None]:
# for queries with output
engine = create_engine('postgresql://'+db_usr+':'+db_pwd+'@'+host+':'+str(port)+'/'+db)
conn = engine.connect()

In [None]:
conn.close()

## load contacts ...

In [None]:
def contactnw_unique(contacts):
    '''
    builds the 'networkx' graph for a specified day based on 'covid_network'
    Parameters
    ----------
    contacts : list of list of str
        list of pairs of device IDs that are in contact
 
    Returns
    -------
    G : nx_graph
        contact graph(sampled; not population graph) for the specified day.
 
    '''
    G = nx.Graph()
    for dids in contacts:
        for v1 in range(len(dids)):
            for v2 in range(v1):
                G.add_edge(dids[v1], dids[v2])
    return G

# function to restrict contact list to contacts with given minimum number of GPS pings
def contactnw_gpsunique(cs, sources, ngps):
    # ngps: minimum number of contact partners with GPS ping
    if ngps == 0:
        G = contactnw_unique([list(set(cc)) for cc in cs])
    elif ngps == 1:
        G = contactnw_unique([[did1, did2] for cc,ss in zip(cs,sources)
                for did1,s1 in zip(cc,ss) for did2,s2 in zip(cc,ss) if [s1,s2].count('GPS')>=1 and did1!=did2])
    elif ngps == 2:
        G = contactnw_unique([[did for did,s in zip(cc,ss) if s=='GPS'] for cc,ss in zip(cs,sources)])
    return G

### ... in Germany

In [None]:
ld_cn_germany = lambda d: """
    with cn_tmp as (
    	select
                  tl8
                , stime
                , dids
                , sources
     			, bool_or(u.dist_stad < csa.radius_in_meter) as in_stadium
                , min(u.area_id) as area_id
            from covid_network_sdkv6_tl8_60m, unnest(area_ids, dist_stads) u(area_id, dist_stad)
            left join cluster_search_areas_v2 csa on csa.area_id = u.area_id
            where
                    "day" = '""" + str(d) + """'
            group by 1,2,3,4
    ),
    cn as (
        select
                  tl8
                , stime
                , dids
                , sources
    			, in_stadium
                , area_id
        from cn_tmp
    )
    select *
    from cn
"""

In [None]:
ngps_stad = 1
data_rows = []
for t, d in enumerate(ds):
    print('processing', d)
    clist = pd.DataFrame(pd.read_sql_query(ld_cn_germany(d), conn))
    # split list of contact events by region (0=outside stadiums <> area_id==null, 1=inside stadiums <> area_id>0)
    clist_splitted = [clist[(clist.in_stadium == False) | clist.in_stadium.isna()],# contacts outside stadiums
                      clist[clist.in_stadium == True]]# contacts inside stadiums
    #print([len(cs) for cs in clist_splitted])
    # construct 2 networks of unique contacts (1 for outside stadiums/region 0, 1 for inside stadiums/region 2),
    # imposing a minimum number of GPS pings per did pair in contact (0 in region 0, 1 in region 1)
    Gs = [contactnw_gpsunique(cs.dids.tolist(),# list of list of dids
                              [s[1:-1].split(',') for s in cs.sources.tolist()],# list of list of ping sources
                              ngps)# minimum number of GPS-sourced pings required per did pair
                  for cs, ngps in zip(clist_splitted, [0, ngps_stad])]
    #print([len(G.edges()) for G in Gs])
    #print([d, 2*len(Gs[0].edges()), 2*len(Gs[1].edges())])
    data_rows.append([d, 2*len(Gs[0].edges()), 2*len(Gs[1].edges())])
data_germany = pd.DataFrame(data_rows, columns=['day','ncontacts_1','ncontacts_2'])

In [None]:
data_germany[['day','ncontacts_1','ncontacts_2']].to_csv('output/ncontacts_germany.csv', index=False)

In [None]:
data_germany = pd.read_csv('output/ncontacts_germany.csv')
data_germany['day'] = [d.date() for d in pd.to_datetime(data_germany.day)]

In [None]:
ld_cr_germany = lambda di, df: f"""
    select "day", ndid_1, ndid_2, pdid_1, pdid_2--, ktot_1
    from covid_results_sdkv6
    where "location"='Deutschland' and "day" between '{str(di)}' and '{str(df)}'
    order by 1
"""
panel_data_germany = pd.DataFrame(pd.read_sql_query(ld_cr_germany(di, df), conn))
panel_data_germany

In [None]:
panel_data_germany.to_csv('output/panel_data_germany.csv', index=False)

In [None]:
#panel_data_germany = pd.read_csv('output/panel_data_germany.csv', index=False)
#panel_data_germany['day'] = [d.date() for d in pd.to_datetime(panel_data_germany.day)]

In [None]:
data_germany = data_germany.merge(panel_data_germany, on='day')

### ... in stadiums

In [None]:
# function to load list of contact events & their locations and ping sources
ld_cn = lambda d: """
    with cn as (
        select
              tl5
            , stime
            , dids
            , sources
            , u.area_id
        from covid_network_sdkv6_tl5_10m, unnest(area_ids, dist_stads) u(area_id, dist_stad)
        left join cluster_search_areas_v2 csa on csa.area_id = u.area_id
        where
                "day" = '""" + str(d) + """'
            and u.dist_stad < csa.radius_in_meter
    )
    select *
    from cn
"""

In [None]:
ngps = 1
data_rows = []
for t, d in enumerate(ds):
    print('processing', d)
    clist = pd.DataFrame(pd.read_sql_query(ld_cn(d), conn))
    for stadium in set(clist.area_id):
        cs = clist[clist.area_id == stadium]
        G = contactnw_gpsunique(cs.dids.tolist(),# list of list of dids
                                [s[1:-1].split(',') for s in cs.sources.tolist()],# list of list of ping sources
                                ngps)# minimum number of GPS-sourced pings required per did pair
        data_rows.append([d, stadium, len(G.edges())])
data = pd.DataFrame(data_rows, columns=['day','area_id','ncontacts'])

In [None]:
data.to_csv('output/ncontacts_stadiums.csv', index=False)

In [None]:
#data = pd.read_csv('output/ncontacts_stadiums.csv')
#data['day'] = [d.date() for d in pd.to_datetime(data.day)]

area ID / stadium name (table `cluster_search_areas_v2`)

- 1: Allianz Arena, München
- 2: Olympiastadion, Berlin
- 3: Red Bull Arena, Leipzig
- 4: Deutsche Bank Park, Frankfurt
- 10: Signal Iduna Park, Dortmund
- 11: Mercedes-Benz Arena, Stuttgart
- 15: RheinEnergieStadion, Köln
- 24: Merkur Spiel-Arena, Düsseldorf
- 25: Volksparkarena, Hamburg
- 28: Veltins-Arena, Gelsenkirchen

In [None]:
euro24_stadiums = [1,2,3,4,10,11,15,24,25,28]

#### absolute contact numbers

In [None]:
sns.set_theme(style="white")

data_plot = data.set_index(['day','area_id']).unstack('area_id').ncontacts

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.color_palette('Blues', as_cmap=True)#sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(#np.log(data_plot),
            np.log(data_plot[euro24_stadiums]).T,
            cmap=cmap, vmin=0.,# vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})

plt.savefig(f'plots/contacts_stadiums.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/contacts_stadiums.pdf', bbox_inches='tight')
plt.show()

### ... in cities

osm ID / city (table `planet_osm_polygon`)

- München: -62428
- Berlin: -62422
- Leipzig: -62649
- Frankfurt: -62400
- Dortmund: -1829065
- Stuttgart: -2793104
- Köln: -62578
- Düsseldorf: -62539
- Hamburg: -62782
- Gelsenkirchen: -62522

In [None]:
# function to load list of contact events & their locations and ping sources
ld_cn = lambda d: """
    with cities1 as (
    	select osm_id, "name", way_area, way
    	from planet_osm_polygon
    	where osm_id in (-62428,-62422,-62649,-62400,-1829065,-2793104,-62578,-62539,-62782,-62522)
    ),
    cities2 as (
    	select osm_id, "name", max(way_area) as way_area
    	from cities1
    	group by 1,2
    ),
    cities3 as (
    	select c1."name", c1.way
    	from cities1 as c1
    	join cities2 as c2 on c1.way_area = c2.way_area
    ),
    cn_tmp as (
    	select
                  tl8
                , stime
                , dids
                , sources
     			, bool_or(u.dist_stad < csa.radius_in_meter) as in_stadium
                , min(u.area_id) as area_id
     			, geopoint
            from covid_network_sdkv6_tl8_60m, unnest(area_ids, dist_stads) u(area_id, dist_stad)
            left join cluster_search_areas_v2 csa on csa.area_id = u.area_id
            where
                    "day" = '""" + str(d) + """'
            group by 1,2,3,4,7
    ),
    cn as (
        select
                  tl8
                , stime
                , dids
                , sources
    			, in_stadium
                , area_id
                , "name" as city
        from cn_tmp
        join cities3 as c3 on st_contains(c3.way, cn_tmp.geopoint)
    )
    select *
    from cn
"""

In [None]:
ngps_stad = 1
data_rows = []
for t, d in enumerate(ds):
    print('processing', d)
    clist = pd.DataFrame(pd.read_sql_query(ld_cn(d), conn))
    for city in set(clist.city):
        csss = clist[clist.city == city]
        csss.loc[:,'area_id'] = csss.area_id.fillna(-1)# -1 means outside any stadium of interest
        for aid in set(csss.area_id):
            css = csss[csss.area_id==aid]
            # split list of contact events by region (0=outside stadiums <> area_id==null, 1=inside stadiums <> area_id>0)
            clist_splitted = [css[(css.in_stadium == False) | css.in_stadium.isna()],# contacts outside stadiums
                              css[css.in_stadium == True]]# contacts inside stadiums
            # construct 2 networks of unique contacts (1 for outside stadiums/region 0, 1 for inside stadiums/region 2),
            # imposing a minimum number of GPS pings per did pair in contact (0 in region 0, 1 in region 1)
            Gs = [contactnw_gpsunique(cs.dids.tolist(),# list of list of dids
                                      [s[1:-1].split(',') for s in cs.sources.tolist()],# list of list of ping sources
                                      ngps)# minimum number of GPS-sourced pings required per did pair
                  for cs, ngps in zip(clist_splitted, [0, ngps_stad])]
            data_rows.append([d, city, aid, 2*len(Gs[0].edges()), 2*len(Gs[1].edges())])# factor 2 because of handshake lemma
data = pd.DataFrame(data_rows, columns=['day','city','area_id','ncontacts_1','ncontacts_2'])

In [None]:
data.to_csv('output/ncontacts_cities.csv', index=False)

In [None]:
data = pd.read_csv('output/ncontacts_cities.csv')
data['day'] = [d.date() for d in pd.to_datetime(data.day)]

EURO 24 match data: https://www.fr.de/sport/fussball/em-2024-spielplan-ergebnisse-termine-gruppen-uhrzeit-deutschland-fussball-news-92086708.html

In [None]:
data.day.max()

In [None]:
match_data = [
    [date(2024,6,14), 'München', 'auto', 'GER-SCO', True, 'auto'],
    [date(2024,6,15), 'Köln', 'auto', 'HUN-SUI', True, 'auto'],
    [date(2024,6,19), 'Köln', 'auto', 'SCO-SUI', True, 'auto'],
    [date(2024,6,19), 'Stuttgart', 'auto', 'GER-HUN', True, 'auto'],
    [date(2024,6,23), 'Frankfurt am Main', 'auto', 'SUI-GER', True, 'auto'],
    [date(2024,6,23), 'Stuttgart', 'auto', 'SCO-HUN', True, 'auto'],

    [date(2024,6,15), 'Berlin', 'auto', 'ESP-CRO', True, 'auto'],
    [date(2024,6,15), 'Dortmund', 'auto', 'ITA-ALB', True, 'auto'],
    [date(2024,6,19), 'Hamburg', 'auto', 'ALB-CRO', True, 'auto'],
    [date(2024,6,20), 'Gelsenkirchen', 'auto', 'ESP-ITA', True, 'auto'],
    [date(2024,6,24), 'Düsseldorf', 'auto', 'ALB-ESP', True, 'auto'],
    [date(2024,6,24), 'Leipzig', 'auto', 'CRO-ITA', True, 'auto'],

    [date(2024,6,16), 'Stuttgart', 'auto', 'SLO-DEN', True, 'auto'],
    [date(2024,6,16), 'Gelsenkirchen', 'auto', 'SRB-ENG', True, 'auto'],
    [date(2024,6,20), 'Frankfurt am Main', 'auto', 'DEN-ENG', True, 'auto'],
    [date(2024,6,20), 'München', 'auto', 'SLO-SRB', True, 'auto'],
    [date(2024,6,25), 'Köln', 'auto', 'ENG-SLO', True, 'auto'],
    [date(2024,6,25), 'München', 'auto', 'DEN-SRB', True, 'auto'],

    [date(2024,6,16), 'Hamburg', 'auto', 'POL-NED', True, 'auto'],
    [date(2024,6,17), 'Düsseldorf', 'auto', 'AUT-FRA', True, 'auto'],
    [date(2024,6,21), 'Berlin', 'auto', 'POL-AUT', True, 'auto'],
    [date(2024,6,21), 'Leipzig', 'auto', 'NED-FRA', True, 'auto'],
    [date(2024,6,25), 'Dortmund', 'auto', 'FRA-POL', True, 'auto'],
    [date(2024,6,25), 'Berlin', 'auto', 'NED-AUT', True, 'auto'],

    [date(2024,6,17), 'Frankfurt am Main', 'auto', 'BEL-SVK', True, 'auto'],
    [date(2024,6,17), 'München', 'auto', 'ROM-UKR', True, 'auto'],
    [date(2024,6,21), 'Düsseldorf', 'auto', 'SVK-UKR', True, 'auto'],
    [date(2024,6,22), 'Köln', 'auto', 'BEL-ROM', True, 'auto'],
    [date(2024,6,26), 'Stuttgart', 'auto', 'UKR-BEL', True, 'auto'],
    [date(2024,6,26), 'Frankfurt am Main', 'auto', 'SVK-ROM', True, 'auto'],

    [date(2024,6,18), 'Dortmund', 'auto', 'TUR-GEO', True, 'auto'],
    [date(2024,6,18), 'Leipzig', 'auto', 'POR-CZE', True, 'auto'],
    [date(2024,6,22), 'Dortmund', 'auto', 'TUR-POR', True, 'auto'],
    [date(2024,6,22), 'Hamburg', 'auto', 'GEO-CZE', True, 'auto'],
    [date(2024,6,26), 'Hamburg', 'auto', 'CZE-TUR', True, 'auto'],
    [date(2024,6,26), 'Gelsenkirchen', 'auto', 'GEO-POR', True, 'auto'],

    [date(2024,6,29), 'Berlin', 'auto', 'SUI-ITA', True, 'auto'],
    [date(2024,6,29), 'Dortmund', 'auto', 'GER-DEN', True, 'auto'],
    [date(2024,6,30), 'Gelsenkirchen', 'auto', 'ENG-SVK', True, 'auto'],
    [date(2024,6,30), 'Köln', 'auto', 'ESP-GEO', True, 'auto'],
    [date(2024,7,1), 'Düsseldorf', 'auto', 'FRA-BEL', True, 'auto'],
    [date(2024,7,1), 'Frankfurt am Main', 'auto', 'POR-SLO', True, 'auto'],
    [date(2024,7,2), 'München', 'auto', 'ROM-NED', True, 'auto'],
    [date(2024,7,2), 'Leipzig', 'auto', 'AUT-TUR', True, 'auto'],

    [date(2024,7,5), 'Stuttgart', 'auto', 'ESP-GER', True, 'auto'],
    [date(2024,7,5), 'Hamburg', 'auto', 'POR-FRA', True, 'auto'],
    [date(2024,7,6), 'Düsseldorf', 'auto', 'ENG-SUI', True, 'auto'],
    [date(2024,7,6), 'Berlin', 'auto', 'NED-TUR', True, 'auto'],

    [date(2024,7,9), 'München', 'auto', 'ESP-FRA', True, 'auto'],
    [date(2024,7,10), 'Dortmund', 'auto', 'ENG-NED', True, 'auto'],

    [date(2024,7,14), 'Berlin', 'auto', 'ESP-ENG', True, 'auto'],

    [date(2024,6,9), 'München', 41, 'AC/DC', False, 66000],# 66000 https://www.abendzeitung-muenchen.de/kultur/musik/so-war-das-erste-konzert-von-acdc-im-olympiastadion-in-muenchen-art-985477#:~:text=AC%2FDC%20in%20M%C3%BCnchen%3A%20Ein,auch%20nicht%20vom%20Dauerregen%20verderben.
    [date(2024,6,12), 'München', 41, 'AC/DC', False, 66000],# 66000
    [date(2024,6,22), 'München', 41, 'A Gabalier', False, 60000],# 60000 https://www.merkur.de/kultur/andreas-gabalier-muenchen-olmypiastadion-konzert-david-hasselhoff-mario-barth-zr-93146068.html#:~:text=%E2%80%9EVolks%2DRock%27n%27,so%20oder%20so%20%C3%A4hnlich%20zutr%C3%A4gt.
    [date(2024,7,11), 'Frankfurt am Main', 'auto', 'Rammstein', False, 40000],# 40000 https://www.hessenschau.de/kultur/rammstein-in-frankfurt-kein-platz-mehr-fuer-ironie-v1,rammstein-frankfurt-konzert-100.html
    [date(2024,7,12), 'Frankfurt am Main', 'auto', 'Rammstein', False, 40000],# 40000 https://www.faz.net/aktuell/rhein-main/kultur/rammstein-in-frankfurt-begeisterte-fans-im-waldstadion-und-kritik-19852500.html
    [date(2024,7,13), 'Frankfurt am Main', 'auto', 'Rammstein', False, 40000],# 40000 https://www.faz.net/aktuell/rhein-main/kultur/rammstein-in-frankfurt-begeisterte-fans-im-waldstadion-und-kritik-19852500.html
    [date(2024,7,12), 'Köln', 'auto', 'P Maffay', False, 37000],# 37000 https://www.express.de/koeln/peter-maffay-in-koeln-verkehrs-chaos-konzert-startet-spaeter-825568
    [date(2024,7,13), 'Köln', 'auto', 'R Kaiser', False, 42000],# 42000 https://www.derwesten.de/panorama/promi-tv/roland-kaiser-koeln-konzert-kinder-id301052575.html#:~:text=Es%20war%20eines%20der%20gr%C3%B6%C3%9Ften,Rhein%2DEnergie%2DStadion%20spielte.
    [date(2024,6,1), 'Dortmund', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,7,8), 'München', 'auto', 'Tollwood', False, 'auto'],
    [date(2024,5,12), 'München', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,24), 'München', 41, 'Metallica', False, 75000],# 75000 https://www.burnyourears.de/live/54216-metallica-konzertbericht-zum-auftakt-der-m72-world-tour-in-m%C3%BCnchen.html#:~:text=Mai%202024%20%E2%80%93%20Erster%20Abend&text=Das%20optische%20und%20akustische%20Donnergrollen,Anwesenden%20im%20M%C3%BCnchner%20Olympiastadion%20nachhallen.
    [date(2024,5,26), 'München', 41, 'Metallica', False, 75000],# 75000
    [date(2024,5,19), 'Hamburg', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,11), 'Gelsenkirchen', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,17), 'Gelsenkirchen', 'auto', 'AC/DC', False, 55000],# 55000 https://www.waz.de/staedte/gelsenkirchen/article242363698/AC-DC-rocken-die-Veltins-Arena-in-Gelsenkirchen.html
    [date(2024,5,21), 'Gelsenkirchen', 'auto', 'AC/DC', False, 54000],# 54000 https://www.radioemscherlippe.de/artikel/gelsenkirchen-zweites-acdc-konzert-in-der-arena-1990321.html
    [date(2024,5,18), 'Stuttgart', 'auto', 'BLiga', True, 'auto'],
    [date(2024,6,1), 'Düsseldorf', -1, 'Japan Day', False, 'auto'],
    [date(2024,6,28), 'Düsseldorf', -1, 'DoKomi', False, 'auto'],
    [date(2024,6,29), 'Düsseldorf', -1, 'DoKomi', False, 'auto'],
    [date(2024,6,30), 'Düsseldorf', -1, 'DoKomi', False, 'auto'],
    [date(2024,5,18), 'Dortmund', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,18), 'Frankfurt am Main', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,11), 'Leipzig', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,11), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,9), 'Köln', 'auto', 'DFB Pokal', True, 'auto'],
    [date(2024,5,19), 'Düsseldorf', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,25), 'Berlin', 'auto', 'DFB Pokal', True, 'auto'],
    [date(2024,5,11), 'Berlin', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,26), 'Berlin', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,5), 'Berlin', 'auto', 'S25 Berlin', False, 'auto'],
    [date(2024,5,4), 'Dortmund', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,1), 'Dortmund', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,5,27), 'Düsseldorf', 'auto', 'BLiga rel', True, 'auto'],
    [date(2024,5,3), 'Düsseldorf', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,5), 'Frankfurt am Main', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,27), 'Gelsenkirchen', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,3), 'Hamburg', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,4), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,27), 'Leipzig', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,27), 'München', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,30), 'München', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,5,4), 'Stuttgart', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,7), 'Dortmund', 'auto', 'B2Run', True, 'auto'],
    [date(2024,4,28), 'Berlin', 16, 'RLiga (f)', True, 'auto'],
    [date(2024,5,5), 'Berlin', 16, 'BLiga', True, 'auto'],
    [date(2024,5,18), 'Berlin', 16, 'BLiga', True, 'auto'],
    [date(2024,6,9), 'Berlin', 16, 'RLiga (f)', True, 'auto'],
    [date(2024,5,25), 'Frankfurt am Main', 64, 'HLiga', True, 'auto'],
    [date(2024,6,30), 'Frankfurt am Main', 64, 'Am. Football', False, 'auto'],
    [date(2024,7,13), 'Frankfurt am Main', 64, 'Am. Football', False, 'auto'],
    [date(2024,4,26), 'Hamburg', 29, 'BLiga', True, 'auto'],
    [date(2024,5,12), 'Hamburg', 29, 'BLiga', True, 'auto'],
    [date(2024,5,25), 'Hamburg', 29, 'RLiga (f)', True, 'auto'],
    [date(2024,6,1), 'Hamburg', 29, 'HipHop', True, 'auto'],
    [date(2024,6,29), 'München', 41, 'fan zone', False, 'auto'],
    [date(2024,7,5), 'München', 41, 'fan zone', False, 'auto'],
    [date(2024,7,13), 'München', 41, 'A Bayern', False, 'auto'],
    [date(2024,7,17), 'München', 41, 'B2Run', False, 'auto'],

    [date(2024,7,18), 'München', -1, 'Tollwood', False, 'auto'],
    [date(2024,7,14), 'Hamburg', 'auto', 'Am. Football', False, 'auto'],
    [date(2024,7,17), 'Gelsenkirchen', 'auto', 'T Swift', False, 60000],# 60000 https://www.welt.de/wirtschaft/article252642610/Taylor-Swift-Mehr-Amerikaner-als-Berliner-diese-Menschen-waren-beim-Konzert-in-Gelsenkirchen.html
    [date(2024,7,18), 'Gelsenkirchen', 'auto', 'T Swift', False, 60000],# 60000
    [date(2024,7,19), 'Gelsenkirchen', 'auto', 'T Swift', False, 60000],# 60000
    [date(2024,7,15), 'Stuttgart', -1, 'P Maffay', False, 16000],# 16000 https://www.stuttgarter-nachrichten.de/inhalt.konzert-auf-dem-wasen-so-war-s-bei-peter-maffay-in-stuttgart.f0e482a1-b87d-4f05-9cd3-2daf990c236a.html
    [date(2024,7,17), 'Stuttgart', -1, 'AC/DC', False, 90000],# 90000 https://www.swr.de/swraktuell/baden-wuerttemberg/stuttgart/acdc-konzert-stuttgart-100.html
    [date(2024,7,19), 'Stuttgart', 'auto', 'P!NK', False, 45000],# 45000 https://www.stuttgarter-nachrichten.de/inhalt.pink-konzert-in-stuttgart-was-fuer-ein-spektakel.1acf464d-6301-483d-8b07-8efd24276f18.html
    [date(2024,6,27), 'Stuttgart', -1, 'fan zone', False, 'auto'],
    [date(2024,7,20), 'Düsseldorf', 'auto', 'Coldplay', False, 47000],# 47000 https://www.rundschau-online.de/kultur/coldplay-konzert-im-duesseldorfer-stadion-begeistert-47-000-831694
    [date(2024,7,21), 'Düsseldorf', 'auto', 'Coldplay', False, 47000],# 47000
    [date(2024,7,23), 'Düsseldorf', 'auto', 'Coldplay', False, 47000],# 47000
    [date(2024,7,18), 'Frankfurt am Main', 'auto', 'P Maffay', False, 39000],# 39000 https://cityguide-rhein-neckar.de/2024/07/19/ein-abend-voller-emotionen-peter-maffay-abschiedskonzert-im-deutsche-bank-park/
    [date(2024,7,20), 'Frankfurt am Main', 'auto', 'R Kaiser', False, 25000],# 25000 https://cityguide-rhein-neckar.de/2024/07/21/roland-kaiser-begeistert-25-000-fans-im-deutsche-bank-park/#:~:text=20.07.2024%20%2D%20Roland%20Kaiser%20bei,Kaiser%2050%20Jahre%2050%20Hits.
    [date(2024,7,17), 'Leipzig', 'auto', 'P!NK', False, 43000],# 43000 https://www.radiosaw.de/artikel/fotos-pink-konzert-leipzig-am-17-juli
    [date(2024,7,19), 'Leipzig', 'auto', 'R Kaiser', False, 44000],# 44000 https://www.lvz.de/kultur/regional/der-kaiser-gibt-sich-die-ehre-ILGMCNV6JJBDPLEBGZ3MTYWNHA.html#:~:text=Um%20ihr%20Idol%20zu%20sehen,Zuschauer%20kein%20Weg%20zu%20weit.&text=%E2%80%9E50%20Jahre%2C%2050%20Hits%E2%80%9C,19.%20Juli%20begeisterte%20die%20Fangemeinde.
    [date(2024,7,20), 'Leipzig', 'auto', 'P Maffay', False, 38000],# 38000 https://www.rnd.de/kultur/peter-maffay-in-leipzig-buehnenabschied-im-stadion-ruehrt-fans-zu-traenen-FVW6BH4VWZH5LMLDHXW4DA4R6U.html#:~:text=Das%20Taschentuch%20in%20der%20Hand,letzte%20Konzert%20von%20Peter%20Maffay.
    [date(2024,7,20), 'Köln', -1, 'CSD', False, 'auto'],
    [date(2024,7,21), 'Köln', -1, 'CSD', False, 'auto'],
    [date(2024,6,1), 'Berlin', -1, 'Schlagernacht', False, 'auto'],

    [date(2024,8,3), 'Berlin', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,12), 'Berlin', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,27), 'Berlin', 16, 'friendly match', True, 'auto'],
    [date(2024,8,3), 'Berlin', 16, 'friendly match', True, 'auto'],
    [date(2024,4,20), 'Berlin', 16, 'BLiga', True, 'auto'],
    [date(2024,4,6), 'Berlin', 16, 'BLiga', True, 'auto'],
    [date(2024,8,3), 'Dortmund', 'auto', 'BLiga3', True, 'auto'],
    [date(2024,4,21), 'Dortmund', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,16), 'Dortmund', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,4,6), 'Dortmund', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,20), 'Düsseldorf', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,7), 'Düsseldorf', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,5), 'Frankfurt am Main', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,19), 'Frankfurt am Main', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,26), 'Frankfurt am Main', 'auto', 'T Scott', False, 'auto'],
    [date(2024,7,27), 'Frankfurt am Main', 'auto', 'T Scott', False, 'auto'],
    [date(2024,7,20), 'Köln', 'auto', 'T Scott', False, 'auto'],
    [date(2024,4,13), 'Gelsenkirchen', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,26), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],# https://www.festivalsunited.com/magazine/konzerte/das-war-rammstein-in-der-veltins-arena-gelsenkirchen
    [date(2024,7,27), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],
    [date(2024,7,29), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],
    [date(2024,7,30), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],
    [date(2024,7,31), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],
    [date(2024,8,3), 'Gelsenkirchen', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,6), 'Hamburg', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,20), 'Hamburg', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,23), 'Hamburg', 'auto', 'T Swift', False, 50000],# https://www.ardmediathek.de/video/hamburg-journal/taylor-swift-verzaubert-50-000-fans-in-hamburg/ndr/Y3JpZDovL25kci5kZS84YzkzMDYyZi1jMDY1LTRmZjktOWI3Yi0wNTc4OWZlMThiYjE#:~:text=Hamburg%20Journal%3A%20Taylor%20Swift%20verzaubert%2050.000%20Fans%20in%20Hamburg%20%7C%20Video,(24.7.2024)%20mit%20Untertitel
    [date(2024,7,24), 'Hamburg', 'auto', 'T Swift', False, 50000],
    [date(2024,4,14), 'Hamburg', 29, 'BLiga', True, 'auto'],
    [date(2024,4,6), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,20), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,8,2), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,13), 'Leipzig', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,17), 'München', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,4,13), 'München', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,27), 'München', 41, 'T Swift', False, 70000],# https://www.allgaeuer-zeitung.de/bayern/taylor-swift-in-muenchen-eras-tour-2024-im-olympiastadion-27-7-24_arid-775404
    [date(2024,7,28), 'München', 41, 'T Swift', False, 70000],
    [date(2024,4,13), 'Stuttgart', 'auto', 'BLiga', True, 'auto'],
    #[date(), '', 'auto', '', False, 'auto'],

]
match_data = pd.DataFrame(match_data, columns=['day','city','area_id','match','is_football','capacity'])
match_data

stadium capacity data: https://de.wikipedia.org/wiki/Fu%C3%9Fball-Europameisterschaft_2024

In [None]:
stadium_data = [
    ['Berlin', 2, 71000, 3645000],#'Berlin'
    ['Berlin', 16, 22000, 3645000],
    ['Dortmund', 10, 62000, 587000],#'Dortmund'
    ['Düsseldorf', 24, 47000, 619000],#'Düsseldorf'
    ['Frankfurt am Main', 4, 47000, 753000],#'Frankfurt am Main'
    ['Frankfurt am Main', 64, 12000, 753000],
    ['Gelsenkirchen', 28, 50000, 261000],#'Gelsenkirchen'
    ['Hamburg', 25, 49000, 1841000],#'Hamburg'
    ['Hamburg', 29, 30000, 1841000],
    ['Köln', 15, 43000, 1086000],#'Köln'
    ['Leipzig', 3, 40000, 588000],#'Leipzig'
    ['München', 1, 66000, 1472000],#'München'
    ['München', 41, 69000, 1472000],
    ['Stuttgart', 11, 54000, 635000],#'Stuttgart'
]
stadium_data = pd.DataFrame(stadium_data, columns=['city','area_id','capacity','population'])
stadium_data

In [None]:
ld_hw = """
    with cities1 as (
    	select osm_id, "name", way_area, way
    	from planet_osm_polygon
    	where osm_id in (-62428,-62422,-62649,-62400,-1829065,-2793104,-62578,-62539,-62782,-62522)
    ),
    cities2 as (
    	select osm_id, "name", max(way_area) as way_area
    	from cities1
    	group by 1,2
    ),
    cities3 as (
    	select c1."name", c1.way
    	from cities1 as c1
    	join cities2 as c2 on c1.way_area = c2.way_area
    )
    select ci."name" as city, count(distinct hw.did) as ndids
    from home_work_sdkv6_202405 as hw, cities3 as ci
    where place = 'home'
    and st_contains(ci.way, st_transform(hw.weighted_centroid, 3857))
    group by 1
"""
panel_data = pd.DataFrame(pd.read_sql_query(ld_hw, conn))
panel_data

In [None]:
panel_data.to_csv('output/panel_data.csv', index=False)

In [None]:
panel_data = pd.read_csv('output/panel_data.csv')

In [None]:
panel_data = panel_data.merge(stadium_data[['city','population']])
panel_data['pdid'] = panel_data.ndids / panel_data.population
panel_data

In [None]:
add_stadiums = [16,29,41,64]

In [None]:
ld_dp = lambda di, df, euro24_stadiums: f"""
    select 
          "day"
        , area_id
        , ndids
    from covid_dids_pings_per_day_sdkv6
    where
            "day" between '{di}' and '{df}' -- = '2024-07-05'
        and area_id in ({','.join([str(aid) for aid in euro24_stadiums + add_stadiums])}) -- = 11
        and "source" = 'ANY'
        and homeloc = 'Deutschland'
        and to_char(stime, 'HH24:MI:SS') = '23:59:59'
"""
panelstad_data = pd.DataFrame(pd.read_sql_query(ld_dp(di, df, euro24_stadiums), conn))
panelstad_data

In [None]:
panelstad_data.to_csv('output/panelstad_data.csv', index=False)

In [None]:
panelstad_data = pd.read_csv('output/panelstad_data.csv')
panelstad_data['day'] = [d.date() for d in pd.to_datetime(panelstad_data.day)]

In [None]:
daylist = [di+timedelta(dt) for dt in range(1+(df-di).days)]
panelstad_data2 = pd.DataFrame(daylist, columns=['day']).merge(pd.DataFrame(euro24_stadiums+add_stadiums, columns=['area_id']), how='cross')
panelstad_data2 = panelstad_data2.merge(panelstad_data, on=['day','area_id'], how='left').fillna(0.)
panelstad_data2['ndids'] = panelstad_data2.ndids.astype(int)
panelstad_data2

In [None]:
panelstad_data2.ndids.max()

In [None]:
aid2city = {aid: city for aid, city in sorted({
    1: 'München',
    2: 'Berlin',
    3: 'Leipzig',
    4: 'Frankfurt am Main',
    10: 'Dortmund',
    11: 'Stuttgart',
    15: 'Köln',
    24: 'Düsseldorf',
    25: 'Hamburg',
    28: 'Gelsenkirchen',

    16: 'Berlin',
    29: 'Hamburg',
    41: 'München',
    64: 'Frankfurt am Main',
}.items(), key=lambda item: item[1])}
city2aid = {city: aid for aid, city in [(aid, city) for aid, city in aid2city.items()][::-1]}

In [None]:
# map standard stadion IDs per city
match_data['area_id'] = [aid if aid!='auto' else city2aid[city] for aid, city in zip(match_data.area_id, match_data.city)]
# map stadion capacity for football matches
match_data['capacity'] = [stadium_data[stadium_data.area_id==aid].capacity.iloc[0] if npop=='auto' and aid!=-1 else npop for aid, npop in zip(match_data.area_id, match_data.capacity)]
match_data

In [None]:
match_data.to_csv('output/event_data.csv', index=False)

In [None]:
ld_csa = f"""
    select 
          area_id
        , area_name
    from cluster_search_areas_v2
"""
stadname_data = pd.DataFrame(pd.read_sql_query(ld_csa, conn))
stadname_data['area_name'] = [name.split(',')[0] for name in stadname_data.area_name]
aid2name = {aid: name for aid, name in zip(stadname_data.area_id, stadname_data.area_name)}
stadname_data

In [None]:
stadname_data.to_csv('output/stadname_data.csv', index=False)

In [None]:
#stadname_data = pd.read_csv('output/stadname_data.csv')

#### user numbers

In [None]:
sns.set_theme(style="ticks")

# Define the palette as a list to specify exact values
#palette = sns.husl_palette(2)#sns.color_palette("rocket_r")

# Plot the lines on two facets
g = sns.relplot(
    data=panelstad_data2,
    x="day", y="ndids",
    #hue="with_stadiums", hue_order=[True,False],
    row="area_id", row_order=list(aid2city.keys()),# size="choice",# col="align",
    kind="line",# palette=palette,# size_order=[False, True], 
    height=2, aspect=4., facet_kws=dict(sharex=False),
)

axes = g.axes
holis = [date(2024,5,9), date(2024,5,20)]
for ax_row, (aid, city) in zip(axes, aid2city.items()):
    for ax in ax_row:
        ax.set_ylabel('users in stadium')
        ax.set_title(f"{city.split(' ')[0]} - {aid2name[aid]}")
        #daylist = [di+timedelta(dt) for dt in range(1+(df-di).days)]
        ax.set_xticks(daylist)
        ax.set_xticklabels([str(d.month).zfill(2)+'/'+str(d.day).zfill(2) if d.weekday()==6 else '' for t,d in enumerate(daylist)])#, rotation=90)
        #ax.set_xlim([di-timedelta(7), df+timedelta(7)])
        #ax.set_ylim([0., 3.])
        matches_here = match_data[match_data.area_id==aid]
        for day, match in zip(matches_here.day, matches_here.match):
            if day in daylist:
                ax.text(day, 90., match, rotation=90, ha='center', va='top')

plt.savefig(f'plots/panelstad.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/panelstad.pdf', bbox_inches='tight')
plt.show()

In [None]:
sns.set_theme(style="ticks")

# Define the palette as a list to specify exact values
#palette = sns.husl_palette(2)#sns.color_palette("rocket_r")

# Plot the lines on two facets
g = sns.relplot(
    data=panelstad_data2[panelstad_data2.area_id.isin(list(aid2city.keys())[:7])],
    x="day", y="ndids",
    #hue="with_stadiums", hue_order=[True,False],
    row="area_id", row_order=list(aid2city.keys())[:7],# size="choice",# col="align",
    kind="line",# palette=palette,# size_order=[False, True], 
    height=2, aspect=4., facet_kws=dict(sharex=False),
)

axes = g.axes
holis = [date(2024,5,9), date(2024,5,20)]
for ax_row, (aid, city) in zip(axes, [(aid, city) for aid, city in aid2city.items()][:7]):
    for ax in ax_row:
        ax.set_ylabel('users in stadium')
        ax.set_title(f"{city.split(' ')[0]} - {aid2name[aid]}")
        #daylist = [di+timedelta(dt) for dt in range(1+(df-di).days)]
        ax.set_xticks(daylist)
        ax.set_xticklabels([str(d.month).zfill(2)+'/'+str(d.day).zfill(2) if d.weekday()==6 else '' for t,d in enumerate(daylist)])#, rotation=90)
        #ax.set_xlim([di-timedelta(7), df+timedelta(7)])
        #ax.set_ylim([0., 3.])
        matches_here = match_data[match_data.area_id==aid]
        for day, match in zip(matches_here.day, matches_here.match):
            if day in daylist:
                ax.text(day, 90., match, rotation=90, ha='center', va='top')

plt.savefig(f'plots/panelstad_1.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/panelstad_1.pdf', bbox_inches='tight')
plt.show()

In [None]:
sns.set_theme(style="ticks")

# Define the palette as a list to specify exact values
#palette = sns.husl_palette(2)#sns.color_palette("rocket_r")

# Plot the lines on two facets
g = sns.relplot(
    data=panelstad_data2[panelstad_data2.area_id.isin(list(aid2city.keys())[7:])],
    x="day", y="ndids",
    #hue="with_stadiums", hue_order=[True,False],
    row="area_id", row_order=list(aid2city.keys())[7:],# size="choice",# col="align",
    kind="line",# palette=palette,# size_order=[False, True], 
    height=2, aspect=4., facet_kws=dict(sharex=False),
)

axes = g.axes
holis = [date(2024,5,9), date(2024,5,20)]
for ax_row, (aid, city) in zip(axes, [(aid, city) for aid, city in aid2city.items()][7:]):
    for ax in ax_row:
        ax.set_ylabel('users in stadium')
        ax.set_title(f"{city.split(' ')[0]} - {aid2name[aid]}")
        #daylist = [di+timedelta(dt) for dt in range(1+(df-di).days)]
        ax.set_xticks(daylist)
        ax.set_xticklabels([str(d.month).zfill(2)+'/'+str(d.day).zfill(2) if d.weekday()==6 else '' for t,d in enumerate(daylist)])#, rotation=90)
        #ax.set_xlim([di-timedelta(7), df+timedelta(7)])
        #ax.set_ylim([0., 3.])
        matches_here = match_data[match_data.area_id==aid]
        for day, match in zip(matches_here.day, matches_here.match):
            if day in daylist:
                ax.text(day, 90., match, rotation=90, ha='center', va='top')

plt.savefig(f'plots/panelstad_2.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/panelstad_2.pdf', bbox_inches='tight')
plt.show()

- 06/29, 07/05: Olympiastadion München: additional fan zone for DFB elimination phase matches: https://www.olympiapark.de/de/der-olympiapark/presse/pressemitteilungen/2024/fan-zone-olympiapark-stockt-public-viewing-auf--deutschland-spiele-nun-auch-im-olympiastadion https://www.sueddeutsche.de/muenchen/em-muenchen-public-viewing-wetter-fan-zone-andrang-voll-olympiastadion-lux.QEvnEYk5HrkJ1JVMCWHj7n

#### absolute contact numbers

In [None]:
print(data[data.area_id>-.5].ncontacts_2.sum(), data[data.area_id<-.5].ncontacts_2.sum())

In [None]:
print(data[data.area_id<-.5].ncontacts_1.sum(), data[data.area_id>-.5].ncontacts_1.sum())

Some out-of-stadium contacts are assigned an area_id (but are too far from stadium center).

In [None]:
#data_plot_1 = data[data.area_id<-.5].set_index(['day','city']).unstack('city').ncontacts_1 # [data.area_id<-.5]
data_plot_1 = data.drop(columns=['area_id']).groupby(['day','city']).sum().unstack('city').ncontacts_1
data_plot_2 = data[data.area_id>-.5].drop(columns=['area_id']).groupby(['day','city']).sum().unstack('city').ncontacts_2
data_plot_1

In [None]:
sns.set_theme(style="white")

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 7), nrows=2)

# Generate a custom diverging colormap
cmap = sns.color_palette('Blues', as_cmap=True)#sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(np.log10(data_plot_1.T),
            #data_plot_1,
            cmap=cmap, vmin=0., vmax=np.log10(data_plot_1.T).max().max(),# center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5, "orientation": 'horizontal', 'pad': .35, 'label': 'log10(contacts city)'},
            ax=ax[0])
sns.heatmap(np.log10(data_plot_2.T),
            #data_plot_2,
            cmap=cmap, vmin=0., vmax=np.log10(data_plot_1.T).max().max(),# center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5, "orientation": 'horizontal', 'pad': .35, 'label': 'log10(contacts stadiums)'},
            ax=ax[1])

for i in range(2):
    ax[i].set_xticks([t+.5 for t in range(len(data_plot_1.index))])
    ax[i].set_xticklabels([str(d.month).zfill(2)+'/'+str(d.day).zfill(2) if t%2==0 else '' for t, d in enumerate(data_plot_1.index)])
    ax[i].set_yticks([c+.5 for c in range(len(data_plot_1.columns))])
    ax[i].set_yticklabels([city for city in data_plot_1.columns])

plt.savefig(f'plots/contacts_city_stadium.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/contacts_city_stadium.pdf', bbox_inches='tight')
plt.show()

In [None]:
# contact numbers
data2 = data.copy(deep=True)
data2 = data2[['day','city','area_id','ncontacts_2']]
data2.rename(columns={'ncontacts_2':'n'}, inplace=True)
data2['which'] = 'contacts'

# device numbers
panelstad_data3 = panelstad_data2.copy(deep=True)
panelstad_data3['city'] = [aid2city[aid] for aid in panelstad_data3.area_id]
panelstad_data3.rename(columns={'ndids':'n'}, inplace=True)
panelstad_data3['which'] = 'devices'

# join both
joint_data = pd.concat([data2, panelstad_data3])
joint_data['n'] = [np.log10(n) if n>0 else 0 for n in joint_data.n]
joint_data = joint_data[joint_data.area_id.isin(euro24_stadiums + add_stadiums)]
joint_data

In [None]:
sns.set_theme(style="ticks")

# Define the palette as a list to specify exact values
palette = sns.husl_palette(2)#sns.color_palette("rocket_r")

# Plot the lines on two facets
g = sns.relplot(
    data=joint_data,
    x="day", y="n",
    hue="which", hue_order=['devices','contacts'],
    row="area_id", row_order=list(aid2city.keys()),# size="which",# col="align",
    kind="line",# palette=palette,# size_order=[False, True], 
    height=2, aspect=4., facet_kws=dict(sharex=False),
    alpha=.5
)

axes = g.axes
holis = [date(2024,5,9), date(2024,5,20)]
for ax_row, (aid, city) in zip(axes, aid2city.items()):
    for ax in ax_row:
        ax.set_ylabel('log10(count)')
        ax.set_title(f"{city.split(' ')[0]} - {aid2name[aid]}")
        #daylist = [di+timedelta(dt) for dt in range(1+(df-di).days)]
        ax.set_xticks(daylist)
        ax.set_xticklabels([str(d.month).zfill(2)+'/'+str(d.day).zfill(2) if d.weekday()==6 else '' for t,d in enumerate(daylist)])#, rotation=90)
        #ax.set_xlim([di-timedelta(7), df+timedelta(7)])
        ax.set_ylim([0., 3.])
        matches_here = match_data[match_data.area_id==aid]
        for day, match in zip(matches_here.day, matches_here.match):
            if day in daylist:
                ax.text(day, 3., match, rotation=90, ha='center', va='top')

plt.savefig(f'plots/panelcontactsstad.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/panelcontactsstad.pdf', bbox_inches='tight')
plt.show()

In [None]:
sns.set_theme(style="ticks")

# Define the palette as a list to specify exact values
palette = sns.husl_palette(2)#sns.color_palette("rocket_r")

# Plot the lines on two facets
g = sns.relplot(
    data=joint_data[joint_data.area_id.isin(list(aid2city.keys())[:7])],
    x="day", y="n",
    hue="which", hue_order=['devices','contacts'],
    row="area_id", row_order=list(aid2city.keys())[:7],# size="which",# col="align",
    kind="line",# palette=palette,# size_order=[False, True], 
    height=2, aspect=4., facet_kws=dict(sharex=False),
    alpha=.5
)

axes = g.axes
holis = [date(2024,5,9), date(2024,5,20)]
for ax_row, (aid, city) in zip(axes, [(aid, city) for (aid, city) in aid2city.items()][:7]):
    for ax in ax_row:
        ax.set_ylabel('log10(count)')
        ax.set_title(f"{city.split(' ')[0]} - {aid2name[aid]}")
        #daylist = [di+timedelta(dt) for dt in range(1+(df-di).days)]
        ax.set_xticks(daylist)
        ax.set_xticklabels([str(d.month).zfill(2)+'/'+str(d.day).zfill(2) if d.weekday()==6 else '' for t,d in enumerate(daylist)])#, rotation=90)
        #ax.set_xlim([di-timedelta(7), df+timedelta(7)])
        ax.set_ylim([0., 3.])
        matches_here = match_data[match_data.area_id==aid]
        for day, match in zip(matches_here.day, matches_here.match):
            if day in daylist:
                ax.text(day, 3., match, rotation=90, ha='center', va='top')

plt.savefig(f'plots/panelcontactsstad_1.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/panelcontactsstad_1.pdf', bbox_inches='tight')
plt.show()

In [None]:
sns.set_theme(style="ticks")

# Define the palette as a list to specify exact values
palette = sns.husl_palette(2)#sns.color_palette("rocket_r")

# Plot the lines on two facets
g = sns.relplot(
    data=joint_data[joint_data.area_id.isin(list(aid2city.keys())[7:])],
    x="day", y="n",
    hue="which", hue_order=['devices','contacts'],
    row="area_id", row_order=list(aid2city.keys())[7:],# size="which",# col="align",
    kind="line",# palette=palette,# size_order=[False, True], 
    height=2, aspect=4., facet_kws=dict(sharex=False),
    alpha=.5
)

axes = g.axes
holis = [date(2024,5,9), date(2024,5,20)]
for ax_row, (aid, city) in zip(axes, [(aid, city) for (aid, city) in aid2city.items()][7:]):
    for ax in ax_row:
        ax.set_ylabel('log10(count)')
        ax.set_title(f"{city.split(' ')[0]} - {aid2name[aid]}")
        #daylist = [di+timedelta(dt) for dt in range(1+(df-di).days)]
        ax.set_xticks(daylist)
        ax.set_xticklabels([str(d.month).zfill(2)+'/'+str(d.day).zfill(2) if d.weekday()==6 else '' for t,d in enumerate(daylist)])#, rotation=90)
        #ax.set_xlim([di-timedelta(7), df+timedelta(7)])
        ax.set_ylim([0., 3.])
        matches_here = match_data[match_data.area_id==aid]
        for day, match in zip(matches_here.day, matches_here.match):
            if day in daylist:
                ax.text(day, 3., match, rotation=90, ha='center', va='top')

plt.savefig(f'plots/panelcontactsstad_2.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/panelcontactsstad_2.pdf', bbox_inches='tight')
plt.show()

#### share of stadium contacts among all contacts in the city

In [None]:
sns.set_theme(style="white")

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9), ncols=1)

# Generate a custom diverging colormap
cmap = sns.color_palette('vlag', as_cmap=True)#sns.diverging_palette(230, 20, as_cmap=True)
cmap = sns.diverging_palette(230, 20, as_cmap=True)

data_plot_ratio = data_plot_2.fillna(0.)/data_plot_1
for _, row in match_data.iterrows():
    if row.is_football and (row.day in data_plot_ratio.index):
        data_plot_ratio.loc[row.day][row.city] = -1.*data_plot_ratio.loc[row.day][row.city]

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap((data_plot_ratio).T,
            #data_plot_1,
            cmap=cmap, vmin=data_plot_ratio.min().min(), vmax=data_plot_ratio.max().max(), center=0.,
            square=True, linewidths=.5, cbar_kws={"shrink": .5, "orientation": 'horizontal'},
            ax=ax)

ax.set_xticks([t+.5 for t in range(len(data_plot_1.index))])
ax.set_xticklabels([str(d.month).zfill(2)+'/'+str(d.day).zfill(2) if t%2==0 else '' for t, d in enumerate(data_plot_1.index)])
ax.set_yticks([c+.5 for c in range(len(data_plot_1.columns))])
ax.set_yticklabels([city for city in data_plot_1.columns])

plt.savefig(f'plots/contactshare_city_stadium.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/contactshare_city_stadium.pdf', bbox_inches='tight')
plt.show()

#### contact exposure comparison between events

requires normalizing for
- city population/stadium capacity
- local panel size

##### Horvitz-Thompson correction

In [None]:
def match2event(match):
    if '-' in match and len(match)==7:
        event = match
    else:
        mapping = {
            'R Kaiser': 'Roland Kaiser',
            'P Maffay': 'Peter Maffay',
            'A Gabalier': 'Andreas Gabalier',
            'T Swift': 'Taylor Swift',
            'T Scott': 'Travis Scott',
            'CSD': 'Christopher Street Day',
            'BLiga': 'Bundesliga',
            'BLiga rel': 'Bundesliga relegation',
            'UEFA CL': 'UEFA Champions League',
            'Am. Football': 'American football'
        }
        if match in mapping:
            event = mapping[match]
        else:
            event = match
    return event

In [None]:
match_rank = match_data\
                .merge(panel_data[['city','pdid','ndids']], on='city', how='left')\
                .merge(stadium_data[['area_id','population']], on='area_id', how='left')\
                .merge(data[['day','city','area_id','ncontacts_2']], on=['day','city','area_id'], how='left')\
                .merge(data[['day','city','ncontacts_1']].groupby(['day','city']).sum().reset_index(), on=['day','city'], how='left')\
                .merge(panelstad_data2, on=['day','area_id'], how='left', suffixes=('','_stad'))\
                .drop_duplicates()
match_rank['pdid_stad'] = match_rank.ndids_stad / match_rank.capacity
match_rank

In [None]:
match_rank[match_rank.match=='POL-AUT']

In [None]:
data[(data.day==date(2024,6,21)) & (data.city=='Berlin')]

In [None]:
for _, row in match_rank.sort_values('ndids_stad', ascending=False).iterrows():
    print(row.ndids_stad, row.match)

In [None]:
match_rank.pdid.hist()
match_rank.pdid_stad[match_rank.ncontacts_2>10].hist()

In [None]:
data_germany.pdid_1.hist()
data_germany.pdid_2.hist()

In [None]:
match_rank = match_rank.merge(citycorr, on='city')# for citycorr, see below

#match_rank['ncontacts_1'] = match_rank.ncontacts_1 / match_rank.pdid / match_rank.ndids
#match_rank['ncontacts_1'] = match_rank.ncontacts_1 / pdid_avg / match_rank.ndids / match_rank.corrfac
pdid_avg = panel_data[['city','pdid']].drop_duplicates().pdid.mean()
match_rank['ncontacts_1'] = match_rank.ncontacts_1 / match_rank.ndids / (match_rank.pdid/pdid_avg)**.1038 / pdid_avg / match_rank.corrfac# .0991 .0988

ncthr = 20
pdid_stad_avg = match_rank.pdid_stad[match_rank.ncontacts_2>=ncthr].mean()
match_rank['ncontacts_2'] = [n if n>=ncthr else np.nan for n in match_rank.ncontacts_2]
match_rank['ncontacts_2'] = match_rank.ncontacts_2 / match_rank.ndids_stad / (match_rank.pdid_stad/pdid_stad_avg)**.5021 / pdid_stad_avg / match_rank.corrfac
# .4736 .4701

match_rank = match_rank.merge(data_germany, on='day', how='left', suffixes=('', '_germany'))
match_rank['ncontacts_3'] = match_rank.ncontacts_1_germany / match_rank.ndid_1 / match_rank.pdid_1 # / data_germany.pdid_1.mean()

match_rank['event'] = [match2event(match) for match in match_rank.match]
match_rank_aux = match_rank.copy(deep=True)
match_rank = match_rank[['city','event','ncontacts_2','ncontacts_1','ncontacts_3']]
match_rank.loc[:,'ncontacts_1'] = [np.log10(n) if n>0 else np.nan for n in match_rank.ncontacts_1]
match_rank.loc[:,'ncontacts_2'] = [np.log10(n) if n>0 else np.nan for n in match_rank.ncontacts_2]
match_rank.loc[:,'ncontacts_3'] = [np.log10(n) if n>0 else np.nan for n in match_rank.ncontacts_3]

In [None]:
to_exclude = ['A Bayern','Bundesliga relegation','American football','test match','RLiga (f)','HLiga','BLiga3','HipHop','S25 Berlin',
              'UEFA Champions League','B2Run','DFB Pokal']

In [None]:
# Make the PairGrid
to_plot = match_rank[~match_rank.event.isin(to_exclude)].sort_values(['ncontacts_1','ncontacts_2','ncontacts_3'], ascending=False)

g = sns.PairGrid(to_plot,
                 x_vars=['ncontacts_2','ncontacts_1','ncontacts_3'], y_vars=["event"],
                 height=10, aspect=.3)

# Draw a dot plot using the stripplot function
g.map(sns.stripplot, size=7.5, orient="h", jitter=False,# alpha=.5,
      linewidth=0, edgecolor="w", hue=to_plot['city'], hue_order=sorted(set(list(aid2city.values()))),
      palette=sns.hls_palette(len(set(match_data.city))))# palette="flare_r", husl/hls_palette

# Use the same x axis limits on all columns and add better labels
#g.set(xlim=(0, 25), xlabel="contacts", ylabel="")

# Use semantically meaningful titles for the columns
titles = ["log10(contacts/person host stadium)", "log10(contacts/person host city)", "log10(contacts/person Germany)"]

for ax, title in zip(g.axes.flat, titles):

    # Set a different title for each axes
    #ax.set(title=title)

    # Make the grid horizontal instead of vertical
    ax.xaxis.grid(False)
    ax.yaxis.grid(True)

# Access the axes and set properties per column
xmin = min(match_rank[match_rank.ncontacts_1>0].ncontacts_1.min(), match_rank[match_rank.ncontacts_2>0].ncontacts_2.min(),
           match_rank[match_rank.ncontacts_2>0].ncontacts_3.min())
xmax = max(match_rank.ncontacts_1.max(), match_rank.ncontacts_2.max(), match_rank.ncontacts_3.max())
dx = .1
for i, ax_col in enumerate(g.axes):
    for j, ax in enumerate(ax_col):
        # Example: Set specific properties per column
        if j == 0:  # First column (x-axis)
            ax.set_xlim([max(0,xmin)-dx, xmax+dx])
            ax.set_ylabel('event')
            ax.set_xlabel(titles[0])
        elif j == 1:
            ax.set_xlim([max(0,xmin)-dx, xmax+dx])
            ax.set_xlabel(titles[1])
        elif j == 2:
            ax.set_xlim([max(0,xmin)-dx, xmax+dx])
            ax.set_xlabel(titles[2])

g.add_legend(title='host city')
sns.despine(left=True, bottom=True)

plt.savefig(f'plots/eventcmp_horvitz.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/eventcmp_horvitz.pdf', bbox_inches='tight')
plt.show()

In [None]:
match_rank_aux[match_rank_aux.event=='SRB-ENG'][match_rank_aux.columns[:-4]]

In [None]:
data[(data.area_id==28) & (data.day==date(2024,7,19))]

- GER-DEN: rainfall https://www.welt.de/sport/fussball/em/article252271302/Deutschland-Daenemark-Diese-Bilder-gingen-um-die-Welt.html
- ENG-NED: rainfall https://www.sportschau.de/fussball/uefa-euro-2024/regenfaelle-in-dortmund-vor-em-halbfinale,regen-dortmund-100.html
- ROM-UKR: demo https://www.br.de/nachrichten/bayern/emotionaler-em-auftakt-fuer-die-ukraine-in-muenchen,UFy5LuC
- TUR-GEO: rainfall https://www.welt.de/sport/fussball/em/article252096768/Tuerkei-Georgien-Das-bislang-groesste-Spektakel-der-EM-2024.html
- ENG-SRB: riot https://www.welt.de/sport/fussball/em/article252059140/EM-2024-Vor-dem-Spiel-geraten-Fans-von-England-und-Serbien-aneinander.html
- all Gelsenkirchen matches after ENG-SRB except ENG-SLO: fear of riot https://uefaeuro2024.gelsenkirchen.de/de/index.aspx
- DEN-ENG: city center crowded https://www.hessenschau.de/panorama/daenemark---england-roemerberg-in-frankfurt-in-britischer-hand-prinz-william-im-stadion-v9,em-frankfurt-frederik-100.html
- SVK-ROM: storm, fan zone visitors sent home https://www.hessenschau.de/panorama/em-fanmeile-in-frankfurt-wegen-unwetter-geschlossen-v2,kurz-frankfurt-fanmeile-geschlossen-unwetter-100.html
- FRA-BEL: fan march https://www1.wdr.de/nachrichten/rheinland/duesseldorf-em-fanmaersche-belgien-frankreich-100.html
- ALB-ESP: mobile data record https://rp-online.de/sport/fussball/em/fussball-em-2024-datenrekord-bei-spanien-gegen-albanien-in-duesseldorfer-arena_aid-115042555

##### uncorrected

In [None]:
match_rank = match_data\
                .merge(stadium_data[['area_id','population']], on='area_id', how='left')\
                .merge(panel_data[['city','pdid','ndids']], on='city', how='left')\
                .merge(data[['day','city','area_id','ncontacts_2']], on=['day','city','area_id'], how='left')\
                .merge(data[['day','city','ncontacts_1']].groupby(['day','city']).sum().reset_index(), on=['day','city'], how='left')\
                .merge(panelstad_data2, on=['day','area_id'], how='left', suffixes=('','_stad'))\
                .drop_duplicates()
match_rank['pdid_stad'] = match_rank.ndids_stad / match_rank.capacity
match_rank

In [None]:
match_rank['ncontacts_1'] = match_rank.ncontacts_1 / (match_rank.population / 1e4)**1.0743 / match_rank.pdid**1.0387
match_rank['ncontacts_2'] = match_rank.ncontacts_2 / (match_rank.capacity / 1e4)**1.0743 / match_rank.pdid_stad**1.0387
match_rank['event'] = [match2event(match) for match in match_rank.match]
match_rank = match_rank[['event','ncontacts_2','ncontacts_1']]
match_rank.loc[:,'ncontacts_1'] = np.log10(match_rank.ncontacts_1)
match_rank.loc[:,'ncontacts_2'] = np.log10(match_rank.ncontacts_2)

In [None]:
# use this one (easier)
match_rank['ncontacts_1'] = match_rank.ncontacts_1 / (match_rank.ndids / 1e4)
match_rank['ncontacts_2'] = match_rank.ncontacts_2 / (match_rank.ndids_stad / 1e4)
match_rank['event'] = [match2event(match) for match in match_rank.match]
match_rank_aux = match_rank.copy(deep=True)
match_rank = match_rank[['event','ncontacts_2','ncontacts_1']]
match_rank.loc[:,'ncontacts_1'] = np.log10(match_rank.ncontacts_1)
match_rank.loc[:,'ncontacts_2'] = np.log10(match_rank.ncontacts_2)

In [None]:
# Make the PairGrid
g = sns.PairGrid(match_rank[~match_rank.event.isin(to_exclude)].sort_values(['ncontacts_1','ncontacts_2'], ascending=False),
                 x_vars=['ncontacts_2', 'ncontacts_1'], y_vars=["event"],
                 height=10, aspect=.3)

# Draw a dot plot using the stripplot function
g.map(sns.stripplot, size=7.5, orient="h", jitter=False,
      palette="flare_r", linewidth=0, edgecolor="w")

# Use the same x axis limits on all columns and add better labels
#g.set(xlim=(0, 25), xlabel="contacts", ylabel="")

# Use semantically meaningful titles for the columns
titles = ["log10(contacts/1e4 persons stadiums)", "log10(contacts/1e4 persons city)"]

for ax, title in zip(g.axes.flat, titles):

    # Set a different title for each axes
    #ax.set(title=title)

    # Make the grid horizontal instead of vertical
    ax.xaxis.grid(False)
    ax.yaxis.grid(True)

# Access the axes and set properties per column
xmin = min(match_rank[match_rank.ncontacts_1>0].ncontacts_1.min(), match_rank[match_rank.ncontacts_2>0].ncontacts_2.min())
xmax = max(match_rank.ncontacts_1.max(), match_rank.ncontacts_2.max())
dx = .1
for i, ax_col in enumerate(g.axes):
    for j, ax in enumerate(ax_col):
        # Example: Set specific properties per column
        if j == 0:  # First column (x-axis)
            ax.set_xlim([max(0,xmin)-dx, xmax+dx])
            ax.set_ylabel('event')
            ax.set_xlabel(titles[0])
        elif j == 1:
            ax.set_xlim([max(0,xmin)-dx, xmax+dx])
            ax.set_xlabel(titles[1])

sns.despine(left=True, bottom=True)

#plt.savefig(f'plots/eventcmp_bydemographic.jpg', bbox_inches='tight', dpi=300)
#plt.savefig(f'plots/eventcmp_bydemographic.pdf', bbox_inches='tight')
plt.show()

##### by event type

In [None]:
def match2event(match):
    if '-' in match and len(match)==7:
        if 'GER' in match:
            event = 'Euro24 match GER'
        elif 'TUR' in match:
            event = 'Euro24 match TUR'
        else:
            event = 'Euro24 match'
    else:
        mapping = {
            'R Kaiser': 'Roland Kaiser',
            'P Maffay': 'Peter Maffay',
            'A Gabalier': 'Andreas Gabalier',
            'T Swift': 'Taylor Swift',
            'CSD': 'Christopher Street Day',
            'BLiga': 'Bundesliga',
            'BLiga rel': 'Bundesliga relegation',
            'UEFA CL': 'UEFA Champions League',
            'Am. Football': 'American football'
        }
        if match in mapping:
            event = mapping[match]
        else:
            event = match
    return event

In [None]:
match_rank = match_data\
                .merge(stadium_data[['area_id','population']], on='area_id', how='left')\
                .merge(panel_data[['city','pdid','ndids']], on='city', how='left')\
                .merge(data, on=['day','city','area_id'], how='left')\
                .merge(panelstad_data2, on=['day','area_id'], how='left', suffixes=('','_stad'))\
                .drop_duplicates()
match_rank['pdid_stad'] = match_rank.ndids_stad / match_rank.capacity
match_rank

In [None]:
match_rank['ncontacts_1'] = match_rank.ncontacts_1 / match_rank.pdid / match_rank.ndids
match_rank['ncontacts_2'] = match_rank.ncontacts_2 / match_rank.pdid_stad / match_rank.ndids_stad
match_rank['event'] = [match2event(match) for match in match_rank.match]
match_rank_aux = match_rank.copy(deep=True)
match_rank = match_rank[['event','ncontacts_2','ncontacts_1']]
match_rank = match_rank.groupby('event').mean().reset_index()
match_rank.loc[:,'ncontacts_1'] = [np.log10(n) if n>0 else np.nan for n in match_rank.ncontacts_1]
match_rank.loc[:,'ncontacts_2'] = [np.log10(n) if n>0 else np.nan for n in match_rank.ncontacts_2]

In [None]:
# 
match_rank['ncontacts_1'] = match_rank.ncontacts_1 / (match_rank.ndids / 1e4)
match_rank['ncontacts_2'] = match_rank.ncontacts_2 / (match_rank.ndids_stad / 1e4)
match_rank['event'] = [match2event(match) for match in match_rank.match]
match_rank_aux = match_rank.copy(deep=True)
match_rank = match_rank[['event','ncontacts_2','ncontacts_1']]
match_rank = match_rank.groupby('event').mean().reset_index()
match_rank.loc[:,'ncontacts_1'] = np.log10(match_rank.ncontacts_1)
match_rank.loc[:,'ncontacts_2'] = np.log10(match_rank.ncontacts_2)

In [None]:
# Make the PairGrid
g = sns.PairGrid(match_rank[~match_rank.event.isin(to_exclude)].sort_values("ncontacts_2", ascending=False),
                 x_vars=match_rank.columns[-2:], y_vars=["event"],
                 height=4, aspect=.6)

# Draw a dot plot using the stripplot function
g.map(sns.stripplot, size=7.5, orient="h", jitter=False,
      palette="flare_r", linewidth=0, edgecolor="w")

# Use the same x axis limits on all columns and add better labels
#g.set(xlim=(0, 25), xlabel="contacts", ylabel="")

# Use semantically meaningful titles for the columns
titles = ["log10(contacts)", "log10(contacts)"]

for ax, title in zip(g.axes.flat, titles):

    # Set a different title for each axes
    #ax.set(title=title)

    # Make the grid horizontal instead of vertical
    ax.xaxis.grid(False)
    ax.yaxis.grid(True)

# Access the axes and set properties per column
xmin = min(match_rank[match_rank.ncontacts_1>0].ncontacts_1.min(), match_rank[match_rank.ncontacts_2>0].ncontacts_2.min())
xmax = max(match_rank.ncontacts_1.max(), match_rank.ncontacts_2.max())
dx = .1
for i, ax_col in enumerate(g.axes):
    for j, ax in enumerate(ax_col):
        # Example: Set specific properties per column
        if j == 0:  # First column (x-axis)
            ax.set_xlim([max(0,xmin)-dx, xmax+dx])
            ax.set_ylabel('event')
            ax.set_xlabel(titles[0])
        elif j == 1:
            ax.set_xlim([max(0,xmin)-dx, xmax+dx])
            ax.set_xlabel(titles[1])

sns.despine(left=True, bottom=True)

plt.savefig(f'plots/eventcmp_byeventtype.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/eventcmp_byeventtype.pdf', bbox_inches='tight')
plt.show()

#### sampling correction model

##### for cities (and Germany)

In [None]:
pdid_avg = panel_data[['city','pdid']].drop_duplicates().pdid.mean()
pdid_avg

In [None]:
#conpop_cmp = data.merge(stadium_data, on=['area_id','city']).merge(panel_data[['city','pdid']], on='city')
conpop_cmp = data.drop(columns=['area_id']).groupby(['day','city']).sum().reset_index()\
                .merge(panel_data[['city','pdid','ndids','population']], on='city')\
                .drop_duplicates()
#conpop_cmp['ncontacts_1_normed'] = conpop_cmp.ncontacts_1 / conpop_cmp.pdid**1.2628 / (conpop_cmp.population / 1e5)**1.1125#**1.0387 **1.0743
#conpop_cmp['ncontacts_1_normed'] = conpop_cmp.ncontacts_1 / conpop_cmp.pdid / (conpop_cmp.population / 1e5)
#conpop_cmp['ncontacts_1_normed'] = conpop_cmp.ncontacts_1 / conpop_cmp.pdid / conpop_cmp.ndids# originally used in event cmp
#conpop_cmp['ncontacts_1_normed'] = conpop_cmp.ncontacts_1 / pdid_avg / conpop_cmp.ndids
conpop_cmp['ncontacts_1_normed'] = conpop_cmp.ncontacts_1 / conpop_cmp.ndids / conpop_cmp.pdid**.1038 / pdid_avg

conpop_cmp

In [None]:
conpop_cmp_germany = data_germany.drop(columns=['ndid_1','ndid_2','pdid_2']).rename(columns={'pdid_1':'pdid'})
conpop_cmp_germany['city'] = 'Germany'
conpop_cmp_germany['population'] = 83248000
conpop_cmp_germany['ncontacts_1_normed'] = conpop_cmp_germany.ncontacts_1 / conpop_cmp_germany.pdid / (conpop_cmp_germany.population / 1e5)
conpop_cmp = pd.concat([conpop_cmp, conpop_cmp_germany])
conpop_cmp

In [None]:
k_avg = conpop_cmp.ncontacts_1_normed.mean()
print(k_avg)
citycorr = pd.DataFrame(conpop_cmp.groupby('city').ncontacts_1_normed.mean() / k_avg)
citycorr = citycorr.rename(columns={'ncontacts_1_normed':'corrfac'}).reset_index()
citycorr

In [None]:
conpop_cmp = conpop_cmp.merge(citycorr, on='city')
conpop_cmp['ncontacts_1_normed'] = conpop_cmp.ncontacts_1_normed / conpop_cmp.corrfac

In [None]:
conpop_cmp.ncontacts_1_normed.mean()

In [None]:
cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True)
g = sns.relplot(
    data=conpop_cmp,
    x="population", y="ncontacts_1",
    #x="population", y="ncontacts_1_normed",
    #x="population", y="ncontacts_1_model",
    hue="pdid", size="ncontacts_2",
    palette=cmap, sizes=(10, 200),
)
g.set(xscale="log", yscale="log")
g.ax.xaxis.grid(True, "minor", linewidth=.25)
g.ax.yaxis.grid(True, "minor", linewidth=.25)
g.despine(left=True, bottom=True)
g.ax.set_xlabel('city population')
g.ax.set_ylabel('detected contacts (without stadiums)')
g.ax.grid(which='major', linestyle='-', linewidth='0.5', color='black', alpha=.25)
g.ax.grid(which='minor', linestyle='-', linewidth='0.5', color='grey', alpha=.25)
g.ax.set_xlim([2.01e5,4e6])
g.ax.set_ylim([2e2,7.99e3])
#g.ax.set_xticks([5e5,1e6])
#g.ax.set_xticklabels([5e5,1e6])

lg = g._legend
for tx in lg.texts:
    if tx.get_text() == 'pdid':
        tx.set_text('user share')
    if tx.get_text() == 'ncontacts_2':
        tx.set_text('in stadiums')

plt.savefig(f'plots/citybias_npop.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/citybias_npop.pdf', bbox_inches='tight')
plt.show()

In [None]:
cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True)
g = sns.relplot(
    data=conpop_cmp,
    x="pdid", y="ncontacts_1",#_normed",
    hue="population", size="ncontacts_2",
    palette=cmap, sizes=(10, 200),
)
g.set(xscale="log", yscale="log")
g.ax.xaxis.grid(True, "minor", linewidth=.25)
g.ax.yaxis.grid(True, "minor", linewidth=.25)
g.despine(left=True, bottom=True)
g.ax.set_xlabel('user share')
g.ax.set_ylabel('detected contacts (without stadiums)')
g.ax.grid(which='major', linestyle='-', linewidth='0.5', color='black', alpha=.25)
g.ax.grid(which='minor', linestyle='-', linewidth='0.5', color='grey', alpha=.25)
g.ax.set_xlim([8.01e-4,7e-3])
g.ax.set_ylim([2e2,7.99e3])

lg = g._legend
for tx in lg.texts:
    if tx.get_text() == 'ncontacts_2':
        tx.set_text('in stadiums')

plt.savefig(f'plots/citybias_pdid.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/citybias_pdid.pdf', bbox_inches='tight')
plt.show()

global pdid:

```
select count(distinct hw.did)
from home_work_sdkv6_202406 as hw
```

-> 328736

In [None]:
#pdid_avg = 328736 / 83800000
#pdid_avg

In [None]:
# Define the response variable y and the predictor variables X
y = conpop_cmp['ncontacts_1']
#y = conpop_cmp['ncontacts_2']

#X = conpop_cmp[['pdid', 'population']]
#X = conpop_cmp[['population']]
X = conpop_cmp[['ndids']]
#X = conpop_cmp[['ndids_stad']]
#X['pdid'] = X.pdid / pdid_avg

y, X = np.log10(y), np.log10(X)
 
# Add a constant term to the predictors (for the intercept)
X = sm.add_constant(X)
 
# Fit the model
model = sm.OLS(y, X).fit()

# Print the model summary
print(model.summary())

In [None]:
# Predict values using the model
#Xnew = X.copy(deep=True)
#Xnew['pdid'] = 0.

predictions = model.predict(X)
conpop_cmp['ncontacts_1_model'] = predictions
 
# Plot true values vs predicted values
plt.scatter(y, predictions)
plt.xlabel('True Values')
plt.ylabel('Predicted Values')
plt.title('True vs Predicted Values')
plt.plot([min(y), max(y)], [min(y), max(y)], color='red', linestyle='--')  # Line of perfect prediction
plt.show()

In [None]:
cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True)
g = sns.relplot(
    data=conpop_cmp,
    x="ndids", y="ncontacts_1",
    hue="pdid",# size="mass",
    palette=cmap, sizes=(10, 200),
    lw=0, size=10,
)
g.ax.plot(conpop_cmp.ndids, 10.**(conpop_cmp.ncontacts_1_model), c='C3', lw=3)
g.set(xscale="log", yscale="log")
g.ax.xaxis.grid(True, "minor", linewidth=.25)
g.ax.yaxis.grid(True, "minor", linewidth=.25)
g.despine(left=True, bottom=True)
g.ax.set_xlabel('users in city')
g.ax.set_ylabel('detected contacts (without stadiums)')
g.ax.set_xlim([1e3,7.99e3])
g.ax.set_ylim([2.01e2,7.99e3])

lg = g._legend
#lg.remove()
for tx in lg.texts:
    #if tx.get_text() == 'pdid':
    #    tx.set_text('user share')
    if tx.get_text() == '10':
        tx.set_text('')
lg.set_title('user share')

# Retrieve the legend handles and labels
handles, labels = lg.legend_handles, lg.texts
#print([l.get_text() for l in labels])
# Define the label you want to remove
label_to_remove = ''
# Filter out the handle and label to remove
filtered_handles_labels = [(h, l) for h, l in zip(handles, labels) if l.get_text() != label_to_remove]
#print(filtered_handles_labels)
# Unpack the filtered handles and labels
filtered_handles, filtered_labels = zip(*filtered_handles_labels)
# Update the legend with the filtered handles and labels
#g.ax.legend(filtered_handles, [label.get_text() for label in filtered_labels])

plt.savefig(f'plots/scaling_cities.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/scaling_cities.pdf', bbox_inches='tight')
plt.show()

##### for stadiums

In [None]:
conpop_cmp = data\
                .merge(panel_data[['city','pdid','ndids','population']], on='city')\
                .merge(panelstad_data2, on=['day','area_id'], how='left', suffixes=('','_stad'))\
                .merge(stadium_data[['area_id','capacity']], on='area_id')\
                .drop_duplicates()
conpop_cmp['pdid_stad'] = conpop_cmp.ndids_stad / conpop_cmp.capacity
conpop_cmp['ncontacts_2_normed'] = conpop_cmp.ncontacts_2 / conpop_cmp.ndids_stad**1.4701# / conpop_cmp.pdid_stad
#match_rank.pdid_stad[match_rank.ncontacts_2>=10].mean()
conpop_cmp = conpop_cmp.merge(match_data[['day','city','is_football']])

In [None]:
cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True)
g = sns.relplot(
    data=conpop_cmp,
    x="ndids_stad", y="ncontacts_2",
    hue="pdid_stad",# size="ncontacts_2_normed",
    palette=cmap, sizes=(10, 200),
)
g.set(xscale="log", yscale="log")
g.ax.xaxis.grid(True, "minor", linewidth=.25)
g.ax.yaxis.grid(True, "minor", linewidth=.25)
g.despine(left=True, bottom=True)
g.ax.set_xlabel('users in stadium')
g.ax.set_ylabel('detected contacts')
g.ax.grid(which='major', linestyle='-', linewidth='0.5', color='black', alpha=.25)
g.ax.grid(which='minor', linestyle='-', linewidth='0.5', color='grey', alpha=.25)
g.ax.set_xlim([2.01e0,1.2e2])
g.ax.set_ylim([1e0,7e2])

lg = g._legend
#for tx in lg.texts:
#    if tx.get_text() == 'pdid_stad':
#        tx.set_text('user share')
lg.set_title('user share')

plt.savefig(f'plots/stadbias_npop.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/stadbias_npop.pdf', bbox_inches='tight')
plt.show()

In [None]:
# Define the response variable y and the predictor variables X
ncthr = 6
y = conpop_cmp[(conpop_cmp.ncontacts_2 >= ncthr) & (conpop_cmp.ndids_stad > 0)]['ncontacts_2']
X = conpop_cmp[(conpop_cmp.ncontacts_2 >= ncthr) & (conpop_cmp.ndids_stad > 0)][['ndids_stad']]#[['pdid', 'capacity']]
#X['pdid'] = X.pdid / pdid_avg
y, X = np.log10(y), np.log10(X)
 
# Add a constant term to the predictors (for the intercept)
X = sm.add_constant(X)

# Fit the model
model = sm.OLS(y, X).fit()
 
# Print the model summary
print(model.summary())

In [None]:
plt.scatter(conpop_cmp.ndids_stad, conpop_cmp.ncontacts_2)
#plt.xscale('symlog', linthresh=1)
#plt.yscale('symlog', linthresh=1)

In [None]:
# Predict values using the model
predictions = model.predict(X)
 
# Plot true values vs predicted values
plt.scatter(y, predictions)
plt.xlabel('True Values')
plt.ylabel('Predicted Values')
plt.title('True vs Predicted Values')
plt.plot([min(y), max(y)], [min(y), max(y)], color='red', linestyle='--')  # Line of perfect prediction
plt.show()

In [None]:
cmap = sns.cubehelix_palette(rot=-.2, as_cmap=True)
g = sns.relplot(
    data=conpop_cmp,
    x="ndids_stad", y="ncontacts_2",
    hue="pdid_stad",# size="mass",
    palette=cmap, sizes=(10, 200),
    lw=0, size=10,
)
g.ax.plot(10.**(X.ndids_stad), 10.**(predictions), c='C3', lw=3)
g.set(xscale="log", yscale="log")
g.ax.xaxis.grid(True, "minor", linewidth=.25)
g.ax.yaxis.grid(True, "minor", linewidth=.25)
g.despine(left=True, bottom=True)
g.ax.set_xlabel('users in stadium')
g.ax.set_ylabel('detected contacts')
g.ax.set_xlim([2.01e0,1.2e2])
g.ax.set_ylim([1e0,7e2])

lg = g._legend
#lg.remove()
for tx in lg.texts:
#    if tx.get_text() == 'pdid_stad':
#        tx.set_text('user share')
    if tx.get_text() == '10':
        tx.set_text('')
lg.set_title('user share')

# Retrieve the legend handles and labels
handles, labels = lg.legend_handles, lg.texts
#print([l.get_text() for l in labels])
# Define the label you want to remove
label_to_remove = ''
# Filter out the handle and label to remove
filtered_handles_labels = [(h, l) for h, l in zip(handles, labels) if l.get_text() != label_to_remove]
#print(filtered_handles_labels)
# Unpack the filtered handles and labels
filtered_handles, filtered_labels = zip(*filtered_handles_labels)
# Update the legend with the filtered handles and labels
#g.ax.legend(filtered_handles, [label.get_text() for label in filtered_labels])

plt.savefig(f'plots/scaling_stadiums.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/scaling_stadiums.pdf', bbox_inches='tight')
plt.show()

##### for Germany

In [None]:
plt.scatter(data_germany.ndid_1, data_germany.ncontacts_1/data_germany.ndid_1)#/data_germany.pdid_1)

#### baseline comparison of contact levels

comparison of contacts level against city- and week-day specific baseline without stadiums:

##### detected contact numbers

In [None]:
vac_data = [
    ['Berlin', date(2024,7,18), date(2024,8,30)],
    ['Dortmund', date(2024,7,8), date(2024,8,20)],
    ['Düsseldorf', date(2024,7,8), date(2024,8,20)],
    ['Frankfurt am Main', date(2024,7,15), date(2024,8,23)],
    ['Gelsenkirchen', date(2024,7,8), date(2024,8,20)],
    ['Hamburg', date(2024,7,18), date(2024,8,28)],
    ['Köln', date(2024,7,8), date(2024,8,20)],
    ['Leipzig', date(2024,6,20), date(2024,8,2)],
    ['München', date(2024,7,29), date(2024,9,9)],
    ['Stuttgart', date(2024,7,25), date(2024,9,7)],
]
vac_data = pd.DataFrame(vac_data, columns=['city','day_start','day_end'])
vac_data

In [None]:
data.day.max()

In [None]:
#data_here = data.copy(deep=True)
data_1 = data.drop(columns=['area_id']).groupby(['day','city']).sum().ncontacts_1.reset_index()
data_2 = data[data.area_id>-.5].drop(columns=['area_id']).groupby(['day','city']).sum().ncontacts_2.reset_index()
data_here = data_1.merge(data_2, on=['day','city'], how='left')
data_here['ncontacts'] = data_here.ncontacts_1 + data_here.ncontacts_2.fillna(0.)
data_here['wd'] = [d.weekday() for d in data_here.day]

# baseline = number contacts in city without stadiums by weekday, averaged over many weeks
baseline = pd.DataFrame(data_here.groupby(['city','wd']).ncontacts_1.mean()).rename(columns={'ncontacts_1':'baseline'}).reset_index()
data_here = data_here.merge(baseline, on=['city','wd'])

data_here2, data_here3 = data_here.copy(deep=True), data_here.copy(deep=True)
data_here2['to_baseline'] = data_here.ncontacts_1/data_here.baseline
data_here2['with_stadiums'] = False
data_here3['to_baseline'] = (data_here.ncontacts_1+data_here.ncontacts_2.fillna(0.))/data_here.baseline
data_here3['with_stadiums'] = True
data_here4 = pd.concat([data_here2, data_here3], ignore_index=True)

#baseline_ci = pd.DataFrame(data_here.groupby(['city','wd']).ncontacts_1.std()).rename(columns={'ncontacts_1':'baseline_std'}).reset_index()
#baseline_ci['lower'] = 1. - baseline_ci.baseline_std / baseline.baseline
#baseline_ci['upper'] = 1. + baseline_ci.baseline_std / baseline.baseline

data_here4

In [None]:
sns.set_theme(style="ticks")

# Define the palette as a list to specify exact values
palette = sns.husl_palette(2)#sns.color_palette("rocket_r")

city_list = sorted(set(list(aid2city.values()))) #set(data_here.city)

# Plot the lines on two facets
g = sns.relplot(
    data=data_here4,
    x="day", y="to_baseline",
    hue="with_stadiums", hue_order=[True,False],
    row="city", row_order=city_list,# size="choice",# col="align",
    kind="line", palette=palette,# size_order=[False, True], 
    height=2, aspect=5.5, facet_kws=dict(sharex=False),
)

axes = g.axes
holis = [date(2024,5,1), date(2024,5,9), date(2024,5,20)]
for ax_row, city in zip(axes, city_list):
    for ax in ax_row:
        ax.set_ylabel('contacts level')
        ax.plot([data_here.day.min(), data_here.day.max()], [1,1], c='gray')
        ax.set_title(city)
        ax.set_xticks(list(set(data_here.day)))
        ax.set_xticklabels([str(d.month).zfill(2)+'/'+str(d.day).zfill(2) if d.weekday()==6 else '' for d in list(set(data_here.day))])#, rotation=90)
        #lower = set(data_here.city)
        #ax.fill_between([data_here.day.min(), data_here.day.max()], [1,1]
        matches_here = match_data[match_data.city==city]
        for day, match in zip(matches_here.day, matches_here.match):
            if day >= data_here4.day.min() and day <= data.day.max():
                ax.text(day, 3., match, rotation=90, ha='center', va='top')
        day_start = vac_data[vac_data.city==city].day_start.iloc[0]
        day_end = min(vac_data[vac_data.city==city].day_end.iloc[0], data.day.max())
        if day_end > day_start:
            ax.fill_between([day_start, day_end], [0]*2, [3]*2, color='gray', alpha=.25)
        ax.set_ylim([0., 3.])

        for holi in holis:
            ax.fill_between([holi-timedelta(1), holi+timedelta(1)], [0]*2, [3]*2, color='C3', alpha=.25)
        if city in ['Frankfurt am Main','München','Köln','Düsseldorf','Dortmund','Gelsenkirchen','Stuttgart']:
            holi = date(2024,5,30)
            ax.fill_between([holi-timedelta(1), holi+timedelta(1)], [0]*2, [3]*2, color='C3', alpha=.25)

lg = g._legend
lg.set_title('stadiums')
for tx in lg.texts:
    if tx.get_text() == 'True':
        tx.set_text('with')
    elif tx.get_text() == 'False':
        tx.set_text('without')

plt.savefig(f'plots/contacts_baseline_cmp.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/contacts_baseline_cmp.pdf', bbox_inches='tight')
plt.show()

In [None]:
sns.set_theme(style="ticks")

# Define the palette as a list to specify exact values
palette = sns.husl_palette(2)#sns.color_palette("rocket_r")

city_list = sorted(set(list(aid2city.values()))) #set(data_here.city)

# Plot the lines on two facets
g = sns.relplot(
    data=data_here4,
    x="day", y="to_baseline",
    hue="with_stadiums", hue_order=[True,False],
    row="city", row_order=city_list,# size="choice",# col="align",
    kind="line", palette=palette,# size_order=[False, True], 
    height=2, aspect=5.5, facet_kws=dict(sharex=False),
)

axes = g.axes
holis = [date(2024,5,9), date(2024,5,20)]
for ax_row, city in zip(axes, city_list):
    for ax in ax_row:
        ax.set_ylabel('contacts level')
        ax.plot([data_here.day.min(), data_here.day.max()], [1,1], c='gray')
        ax.set_title(city)
        ax.set_xticks(list(set(data_here.day)))
        ax.set_xticklabels([str(d.month).zfill(2)+'/'+str(d.day).zfill(2) if d.weekday()==6 else '' for d in list(set(data_here.day))])#, rotation=90)
        #lower = set(data_here.city)
        #ax.fill_between([data_here.day.min(), data_here.day.max()], [1,1]
        matches_here = match_data[match_data.city==city]
        for day, match in zip(matches_here.day, matches_here.match):
            if day >= data_here4.day.min() and day <= data.day.max():
                ax.text(day, 3., match, rotation=90, ha='center', va='top')
        day_start = vac_data[vac_data.city==city].day_start.iloc[0]
        day_end = min(vac_data[vac_data.city==city].day_end.iloc[0], data.day.max())
        if day_end > day_start:
            ax.fill_between([day_start, day_end], [0]*2, [3]*2, color='gray', alpha=.25)
        ax.set_ylim([0., 3.])

        for holi in holis:
            ax.fill_between([holi-timedelta(1), holi+timedelta(1)], [0]*2, [3]*2, color='C3', alpha=.25)
        if city in ['Frankfurt am Main','München','Köln','Düsseldorf','Dortmund','Gelsenkirchen','Stuttgart']:
            holi = date(2024,5,30)
            ax.fill_between([holi-timedelta(1), holi+timedelta(1)], [0]*2, [3]*2, color='C3', alpha=.25)

lg = g._legend
lg.set_title('stadiums')
for tx in lg.texts:
    if tx.get_text() == 'True':
        tx.set_text('with')
    elif tx.get_text() == 'False':
        tx.set_text('without')

plt.savefig(f'plots/contacts_baseline_cmp.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/contacts_baseline_cmp.pdf', bbox_inches='tight')
plt.show()

In [None]:
#data_here = data.copy(deep=True)
data_1 = data.drop(columns=['area_id']).groupby(['day','city']).sum().ncontacts_1.reset_index()
data_2 = data[data.area_id>-.5].drop(columns=['area_id']).groupby(['day','city']).sum().ncontacts_2.reset_index()
data_here = data_1.merge(data_2, on=['day','city'], how='left')
data_here['ncontacts'] = data_here.ncontacts_1 + data_here.ncontacts_2.fillna(0.)
data_here['wd'] = [d.weekday() for d in data_here.day]

# baseline = number contacts in city without stadiums by weekday, averaged over many weeks
baseline = pd.DataFrame(data_here.groupby(['city']).ncontacts_1.mean()).rename(columns={'ncontacts_1':'baseline'}).reset_index()
data_here = data_here.merge(baseline, on=['city'])

data_here2, data_here3 = data_here.copy(deep=True), data_here.copy(deep=True)
data_here2['to_baseline'] = data_here.ncontacts_1/data_here.baseline
data_here2['with_stadiums'] = False
data_here3['to_baseline'] = (data_here.ncontacts_1+data_here.ncontacts_2.fillna(0.))/data_here.baseline
data_here3['with_stadiums'] = True
data_here4 = pd.concat([data_here2, data_here3], ignore_index=True)

data_here4

In [None]:
sns.set_theme(style="ticks")

# Define the palette as a list to specify exact values
palette = sns.husl_palette(2)#sns.color_palette("rocket_r")

city_list = sorted(set(list(aid2city.values()))) #set(data_here.city)

# Plot the lines on two facets
g = sns.relplot(
    data=data_here4,
    x="day", y="to_baseline",
    hue="with_stadiums", hue_order=[True,False],
    row="city", row_order=city_list,# size="choice",# col="align",
    kind="line", palette=palette,# size_order=[False, True], 
    height=2, aspect=5.5, facet_kws=dict(sharex=False),
)

axes = g.axes
holis = [date(2024,5,1), date(2024,5,9), date(2024,5,20)]
for ax_row, city in zip(axes, city_list):
    for ax in ax_row:
        ax.set_ylabel('contacts level')
        ax.plot([data_here.day.min(), data_here.day.max()], [1,1], c='gray')
        ax.set_title(city)
        ax.set_xticks(list(set(data_here.day)))
        ax.set_xticklabels([str(d.month).zfill(2)+'/'+str(d.day).zfill(2) if d.weekday()==6 else '' for d in list(set(data_here.day))])#, rotation=90)
        #lower = set(data_here.city)
        #ax.fill_between([data_here.day.min(), data_here.day.max()], [1,1]
        matches_here = match_data[match_data.city==city]
        for day, match in zip(matches_here.day, matches_here.match):
            if day >= data_here4.day.min() and day <= data.day.max():
                ax.text(day, 3., match, rotation=90, ha='center', va='top')
        day_start = vac_data[vac_data.city==city].day_start.iloc[0]
        day_end = min(vac_data[vac_data.city==city].day_end.iloc[0], data.day.max())
        if day_end > day_start:
            ax.fill_between([day_start, day_end], [0]*2, [3]*2, color='gray', alpha=.25)
        ax.set_ylim([0., 3.])

        for holi in holis:
            ax.fill_between([holi-timedelta(1), holi+timedelta(1)], [0]*2, [3]*2, color='C3', alpha=.25)
        if city in ['Frankfurt am Main','München','Köln','Düsseldorf','Dortmund','Gelsenkirchen','Stuttgart']:
            holi = date(2024,5,30)
            ax.fill_between([holi-timedelta(1), holi+timedelta(1)], [0]*2, [3]*2, color='C3', alpha=.25)

lg = g._legend
lg.set_title('stadiums')
for tx in lg.texts:
    if tx.get_text() == 'True':
        tx.set_text('with')
    elif tx.get_text() == 'False':
        tx.set_text('without')

plt.savefig(f'plots/contacts_baseline_cmp_wd.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/contacts_baseline_cmp_wd.pdf', bbox_inches='tight')
plt.show()

##### Horvitz-Thompson correction

In [None]:
match_rank = data[['day','city','area_id']]\
                .merge(panel_data[['city','pdid','ndids']], on='city', how='left')\
                .merge(stadium_data[['city','population']], on='city', how='left')\
                .merge(data[['day','city','area_id','ncontacts_2']], on=['day','city','area_id'], how='left')\
                .merge(data[['day','city','ncontacts_1']].groupby(['day','city']).sum().reset_index(), on=['day','city'], how='left')\
                .merge(panelstad_data2, on=['day','area_id'], how='left', suffixes=('','_stad'))\
                .merge(match_data, on=['day','city','area_id'], how='left')\
                .drop_duplicates()
match_rank['pdid_stad'] = match_rank.ndids_stad / match_rank.capacity
match_rank

In [None]:
match_rank = match_rank.merge(citycorr, on='city')# for citycorr, see below

#match_rank['ncontacts_1'] = match_rank.ncontacts_1 / match_rank.pdid / match_rank.ndids
#match_rank['ncontacts_1'] = match_rank.ncontacts_1 / pdid_avg / match_rank.ndids / match_rank.corrfac
pdid_avg = panel_data[['city','pdid']].drop_duplicates().pdid.mean()
match_rank['ncontacts_1'] = match_rank.ncontacts_1 / match_rank.ndids / (match_rank.pdid/pdid_avg)**.1038 / pdid_avg / match_rank.corrfac# .0991 .0988
match_rank['ncontacts_1'] = match_rank.ncontacts_1 * match_rank.population/1e4

ncthr = 20
pdid_stad_avg = match_rank.pdid_stad[match_rank.ncontacts_2>=ncthr].mean()
match_rank['ncontacts_2'] = [n if n>=ncthr else np.nan for n in match_rank.ncontacts_2]
match_rank['ncontacts_2'] = match_rank.ncontacts_2 / match_rank.ndids_stad / (match_rank.pdid_stad/pdid_stad_avg)**.5021 / pdid_stad_avg / match_rank.corrfac
# .4736 .4701
match_rank['ncontacts_2'] = match_rank.ncontacts_2 * match_rank.capacity/1e4

##match_rank = match_rank.merge(data_germany, on='day', how='left', suffixes=('', '_germany'))
#match_rank['ncontacts_3'] = match_rank.ncontacts_1_germany / match_rank.ndid_1 / match_rank.pdid_1 # / data_germany.pdid_1.mean()

#match_rank['event'] = [match2event(match) for match in match_rank.match]
#match_rank_aux = match_rank.copy(deep=True)
#match_rank = match_rank#[['city','day','ncontacts_2','ncontacts_1','ncontacts_3']]
#match_rank.loc[:,'ncontacts_1'] = [np.log10(n) if n>0 else np.nan for n in match_rank.ncontacts_1]
#match_rank.loc[:,'ncontacts_2'] = [np.log10(n) if n>0 else np.nan for n in match_rank.ncontacts_2]
#match_rank.loc[:,'ncontacts_3'] = [np.log10(n) if n>0 else np.nan for n in match_rank.ncontacts_3]

In [None]:
data_n = match_rank
data_n

In [None]:
#data_here = data.copy(deep=True)
data_1 = data_n.drop(columns=['area_id']).groupby(['day','city']).ncontacts_1.sum().reset_index()
data_2 = data_n[data_n.area_id>-.5].drop(columns=['area_id']).groupby(['day','city']).ncontacts_2.sum().reset_index()
data_here = data_1.merge(data_2, on=['day','city'], how='left')
data_here['ncontacts'] = data_here.ncontacts_1 + data_here.ncontacts_2.fillna(0.)
data_here['wd'] = [d.weekday() for d in data_here.day]

# baseline = number contacts in city without stadiums by weekday, averaged over many weeks
baseline = pd.DataFrame(data_here.groupby(['city','wd']).ncontacts_1.mean()).rename(columns={'ncontacts_1':'baseline'}).reset_index()
data_here = data_here.merge(baseline, on=['city','wd'])

data_here2, data_here3 = data_here.copy(deep=True), data_here.copy(deep=True)
data_here2['to_baseline'] = data_here.ncontacts_1/data_here.baseline
data_here2['with_stadiums'] = False
data_here3['to_baseline'] = (data_here.ncontacts_1+data_here.ncontacts_2.fillna(0.))/data_here.baseline
data_here3['with_stadiums'] = True
data_here4 = pd.concat([data_here2, data_here3], ignore_index=True)

#baseline_ci = pd.DataFrame(data_here.groupby(['city','wd']).ncontacts_1.std()).rename(columns={'ncontacts_1':'baseline_std'}).reset_index()
#baseline_ci['lower'] = 1. - baseline_ci.baseline_std / baseline.baseline
#baseline_ci['upper'] = 1. + baseline_ci.baseline_std / baseline.baseline

data_here4

In [None]:
sns.set_theme(style="ticks")

# Define the palette as a list to specify exact values
palette = sns.husl_palette(2)#sns.color_palette("rocket_r")

city_list = sorted(set(list(aid2city.values()))) #set(data_here.city)

# Plot the lines on two facets
g = sns.relplot(
    data=data_here4,
    x="day", y="to_baseline",
    hue="with_stadiums", hue_order=[True,False],
    row="city", row_order=city_list,# size="choice",# col="align",
    kind="line", palette=palette,# size_order=[False, True], 
    height=2, aspect=5.5, facet_kws=dict(sharex=False),
)

axes = g.axes
holis = [date(2024,5,1),date(2024,5,9), date(2024,5,20)]
for ax_row, city in zip(axes, city_list):
    for ax in ax_row:
        ax.set_ylabel('contacts level')
        ax.plot([data_here.day.min(), data_here.day.max()], [1,1], c='gray')
        ax.set_title(city)
        ax.set_xticks(list(set(data_here.day)))
        ax.set_xticklabels([str(d.month).zfill(2)+'/'+str(d.day).zfill(2) if d.weekday()==6 else '' for d in list(set(data_here.day))])#, rotation=90)
        #lower = set(data_here.city)
        #ax.fill_between([data_here.day.min(), data_here.day.max()], [1,1]
        matches_here = match_data[match_data.city==city]
        for day, match in zip(matches_here.day, matches_here.match):
            if day >= data_here4.day.min() and day <= data.day.max():
                ax.text(day, 4., match, rotation=90, ha='center', va='top')
        day_start = vac_data[vac_data.city==city].day_start.iloc[0]
        day_end = min(vac_data[vac_data.city==city].day_end.iloc[0], data.day.max())
        if day_end > day_start:
            ax.fill_between([day_start, day_end], [0]*2, [4]*2, color='gray', alpha=.25)
        ax.set_ylim([0., 4.])

        for holi in holis:
            ax.fill_between([holi-timedelta(1), holi+timedelta(1)], [0]*2, [4]*2, color='C3', alpha=.25)
        if city in ['Frankfurt am Main','München','Köln','Düsseldorf','Dortmund','Gelsenkirchen','Stuttgart']:
            holi = date(2024,5,30)
            ax.fill_between([holi-timedelta(1), holi+timedelta(1)], [0]*2, [4]*2, color='C3', alpha=.25)

lg = g._legend
lg.set_title('stadiums')
for tx in lg.texts:
    if tx.get_text() == 'True':
        tx.set_text('with')
    elif tx.get_text() == 'False':
        tx.set_text('without')

plt.savefig(f'plots/contacts_baseline_cmp_horvitz.jpg', bbox_inches='tight', dpi=300)
plt.savefig(f'plots/contacts_baseline_cmp_horvitz.pdf', bbox_inches='tight')
plt.show()

In [None]:
at_t = data_here4[['day','city','to_baseline']]
at_tm1, at_tp1 = at_t.copy(deep=True), at_t.copy(deep=True)
at_tm1.loc[:,'day'] = at_tm1.day - timedelta(1)
at_tp1.loc[:,'day'] = at_tp1.day + timedelta(1)
at_tm1

In [None]:
at_t = at_t#.merge(at_tm1, on=['day','city'], suffixes=('','_tm1'))#.merge(at_tp1, on=['day','city'], suffixes=('','_tp1'))
#at_t['effect'] = 2. * at_t.to_baseline / ( at_t.to_baseline_tm1 + at_t.to_baseline_tp1 )
at_t

In [None]:
at_t[['day','city','effect']].set_index(['day','city'])#.unstack('city')