In [None]:
import os, pandas as pd, networkx as nx, numpy as np, statsmodels.api as sm
from sqlalchemy import create_engine
from datetime import date, datetime, timedelta
import matplotlib.pyplot as plt, seaborn as sns

## parameters

In [None]:
yeari, yearf = '2024', '2024'
weeki, weekf = '18', '31'

In [None]:
di = datetime.strptime(f'{yeari}-{weeki}-1', "%Y-%W-%w").date()
df = datetime.strptime(f'{yearf}-{weekf}-1', "%Y-%W-%w").date() + timedelta(6)
ds = [di+timedelta(dt) for dt in range((df-di).days+1)]
daylist = ds
print(di, 'until', df)

In [None]:
cdef = 'tl7_10m'# 'tl5_10m' 'tl6_10m' 'tl7_10m' 'tl8_10m' 'tl8_60m'
cdef_alt = '16m_10min'# tl5: 62 ... tl7: 16   tl8: 8

## database connection

In [None]:
# database credentials
db_usr, db_pwd = os.getenv('DB_USR'), os.getenv('DB_PWD') # your database user name and password
# database login
host, port, db = 'nc-health-data-prod.cluster-ccsgl7rk4urn.eu-central-1.rds.amazonaws.com', 5432, 'master'

In [None]:
# for queries with output
engine = create_engine('postgresql://'+db_usr+':'+db_pwd+'@'+host+':'+str(port)+'/'+db)
conn = engine.connect()

In [None]:
conn.close()

## load contact numbers $n_c$ ...

In [None]:
def contactnw_unique(contacts):
    '''
    builds the 'networkx' graph for a specified day based on 'covid_network'
    Parameters
    ----------
    contacts : list of list of str
        list of pairs of device IDs that are in contact
 
    Returns
    -------
    G : nx_graph
        contact graph(sampled; not population graph) for the specified day.
 
    '''
    G = nx.Graph()
    for dids in contacts:
        for v1 in range(len(dids)):
            for v2 in range(v1):
                G.add_edge(dids[v1], dids[v2])
    return G

# function to restrict contact list to contacts with given minimum number of GPS pings
def contactnw_gpsunique(cs, sources, ngps):
    # ngps: minimum number of contact partners with GPS ping
    if ngps == 0:
        G = contactnw_unique([list(set(cc)) for cc in cs])
    elif ngps == 1:
        G = contactnw_unique([[did1, did2] for cc,ss in zip(cs,sources)
                for did1,s1 in zip(cc,ss) for did2,s2 in zip(cc,ss) if [s1,s2].count('GPS')>=1 and did1!=did2])
    elif ngps == 2:
        G = contactnw_unique([[did for did,s in zip(cc,ss) if s=='GPS'] for cc,ss in zip(cs,sources)])
    return G

### ... for Germany

In [None]:
#cdef_cn = '_'+cdef if cdef == 'tl8_2min' else ''
ld_cn_germany = lambda d: f"""
    with cn_tmp as (
    	select
                  tl{cdef[2]}
                , stime
                , dids
                , sources
     			, bool_or(u.dist_stad < csa.radius_in_meter) as in_stadium
                , min(u.area_id) as area_id
            from covid_network_sdkv6_{cdef}, unnest(area_ids, dist_stads) u(area_id, dist_stad)
            left join cluster_search_areas_v2 csa on csa.area_id = u.area_id
            where
                    "day" = '{str(d)}'
            group by 1,2,3,4
    ),
    cn as (
        select
                  tl{cdef[2]}
                , stime
                , dids
                , sources
    			, in_stadium
                , area_id
        from cn_tmp
    )
    select *
    from cn
"""

In [None]:
ngps_stad = 1
data_rows = []
for t, d in enumerate(ds):
    print('processing', d, cdef)
    clist = pd.DataFrame(pd.read_sql_query(ld_cn_germany(d), conn))
    # split list of contact events by region (0=outside stadiums <> area_id==null, 1=inside stadiums <> area_id>0)
    clist_splitted = [clist[(clist.in_stadium == False) | clist.in_stadium.isna()],# contacts outside stadiums
                      clist[clist.in_stadium == True]]# contacts inside stadiums
    #print([len(cs) for cs in clist_splitted])
    # construct 2 networks of unique contacts (1 for outside stadiums/region 0, 1 for inside stadiums/region 2),
    # imposing a minimum number of GPS pings per did pair in contact (0 in region 0, 1 in region 1)
    Gs = [contactnw_gpsunique(cs.dids.tolist(),# list of list of dids
                              [s[1:-1].split(',') for s in cs.sources.tolist()],# list of list of ping sources
                              ngps)# minimum number of GPS-sourced pings required per did pair
                  for cs, ngps in zip(clist_splitted, [0, ngps_stad])]
    #print([len(G.edges()) for G in Gs])
    #print([d, 2*len(Gs[0].edges()), 2*len(Gs[1].edges())])
    data_rows.append([d, 2*len(Gs[0].edges()), 2*len(Gs[1].edges())])
data_germany = pd.DataFrame(data_rows, columns=['day','ncontacts_1','ncontacts_2'])

In [None]:
data_germany[['day','ncontacts_1','ncontacts_2']].to_csv(f'output/00_ncontacts_germany_{cdef}.csv', index=False)

In [None]:
data_germany = pd.read_csv(f'output/00_ncontacts_germany_{cdef}.csv')
data_germany['day'] = [d.date() for d in pd.to_datetime(data_germany.day)]
data_germany

### ... for cities & stadiums

osm ID / city (table `planet_osm_polygon`)

- München: -62428
- Berlin: -62422
- Leipzig: -62649
- Frankfurt: -62400
- Dortmund: -1829065
- Stuttgart: -2793104
- Köln: -62578
- Düsseldorf: -62539
- Hamburg: -62782
- Gelsenkirchen: -62522

In [None]:
# function to load list of contact events & their locations and ping sources
ld_cn = lambda d: f"""
    with cities1 as (
    	select osm_id, "name", way_area, way
    	from planet_osm_polygon
    	where osm_id in (-62428,-62422,-62649,-62400,-1829065,-2793104,-62578,-62539,-62782,-62522)
    ),
    cities2 as (
    	select osm_id, "name", max(way_area) as way_area
    	from cities1
    	group by 1,2
    ),
    cities3 as (
    	select c1."name", c1.way
    	from cities1 as c1
    	join cities2 as c2 on c1.way_area = c2.way_area
    ),
    cn_tmp as (
    	select
                  tl{cdef[2]}
                , stime
                , dids
                , sources
     			, bool_or(u.dist_stad < csa.radius_in_meter) as in_stadium
                , min(u.area_id) as area_id
     			, geopoint
            from covid_network_sdkv6_{cdef}, unnest(area_ids, dist_stads) u(area_id, dist_stad)
            left join cluster_search_areas_v2 csa on csa.area_id = u.area_id
            where
                    "day" = '{str(d)}'
            group by 1,2,3,4,7
    ),
    cn as (
        select
                  tl{cdef[2]}
                , stime
                , dids
                , sources
    			, in_stadium
                , area_id
                , "name" as city
        from cn_tmp
        join cities3 as c3 on st_contains(c3.way, cn_tmp.geopoint)
    )
    select *
    from cn
"""

In [None]:
ngps_stad = 1
data_rows = []
for t, d in enumerate(ds):
    print('processing', d, cdef)
    clist = pd.DataFrame(pd.read_sql_query(ld_cn(d), conn))
    for city in set(clist.city):
        csss = clist[clist.city == city]
        csss.loc[:,'area_id'] = csss.area_id.fillna(-1)# -1 means outside any stadium of interest
        for aid in set(csss.area_id):
            css = csss[csss.area_id==aid]
            # split list of contact events by region (0=outside stadiums <> area_id==null, 1=inside stadiums <> area_id>0)
            clist_splitted = [css[(css.in_stadium == False) | css.in_stadium.isna()],# contacts outside stadiums
                              css[css.in_stadium == True]]# contacts inside stadiums
            # construct 2 networks of unique contacts (1 for outside stadiums/region 0, 1 for inside stadiums/region 2),
            # imposing a minimum number of GPS pings per did pair in contact (0 in region 0, 1 in region 1)
            Gs = [contactnw_gpsunique(cs.dids.tolist(),# list of list of dids
                                      [s[1:-1].split(',') for s in cs.sources.tolist()],# list of list of ping sources
                                      ngps)# minimum number of GPS-sourced pings required per did pair
                  for cs, ngps in zip(clist_splitted, [0, ngps_stad])]
            data_rows.append([d, city, aid, 2*len(Gs[0].edges()), 2*len(Gs[1].edges())])# factor 2 because of handshake lemma
data = pd.DataFrame(data_rows, columns=['day','city','area_id','ncontacts_1','ncontacts_2'])

In [None]:
data.to_csv(f'output/00_ncontacts_cities_{cdef}.csv', index=False)

In [None]:
data = pd.read_csv(f'output/00_ncontacts_cities_{cdef}.csv')
data['day'] = [d.date() for d in pd.to_datetime(data.day)]
data

In [None]:
data.day.max()

## load device numbers $n_d$

### ... for Germany

In [None]:
ld_cr_germany = lambda di, df: f"""
    select "day", ndid_1, ndid_2, pdid_1, pdid_2--, ktot_1
    from covid_results_sdkv6
    where "location"='Deutschland' and "day" between '{str(di)}' and '{str(df)}'
    order by 1
"""
panel_data_germany = pd.DataFrame(pd.read_sql_query(ld_cr_germany(di, df), conn))
panel_data_germany

In [None]:
panel_data_germany.to_csv('output/00_panel_data_germany.csv', index=False)

In [None]:
panel_data_germany = pd.read_csv('output/00_panel_data_germany.csv')
panel_data_germany['day'] = [d.date() for d in pd.to_datetime(panel_data_germany.day)]
panel_data_germany

### ... for cities

In [None]:
ld_hw = """
    with cities1 as (
    	select osm_id, "name", way_area, way
    	from planet_osm_polygon
    	where osm_id in (-62428,-62422,-62649,-62400,-1829065,-2793104,-62578,-62539,-62782,-62522)
    ),
    cities2 as (
    	select osm_id, "name", max(way_area) as way_area
    	from cities1
    	group by 1,2
    ),
    cities3 as (
    	select c1."name", c1.way
    	from cities1 as c1
    	join cities2 as c2 on c1.way_area = c2.way_area
    )
    select ci."name" as city, count(distinct hw.did) as ndids
    from home_work_sdkv6_202405 as hw, cities3 as ci
    where place = 'home'
    and st_contains(ci.way, st_transform(hw.weighted_centroid, 3857))
    group by 1
"""
panel_data = pd.DataFrame(pd.read_sql_query(ld_hw, conn))
panel_data

In [None]:
panel_data.to_csv('output/00_panel_data.csv', index=False)

In [None]:
panel_data = pd.read_csv('output/00_panel_data.csv')
panel_data

### ... for stadiums

area ID / stadium name (table `cluster_search_areas_v2`)

- 1: Allianz Arena, München
- 2: Olympiastadion, Berlin
- 3: Red Bull Arena, Leipzig
- 4: Deutsche Bank Park, Frankfurt
- 10: Signal Iduna Park, Dortmund
- 11: Mercedes-Benz Arena, Stuttgart
- 15: RheinEnergieStadion, Köln
- 24: Merkur Spiel-Arena, Düsseldorf
- 25: Volksparkarena, Hamburg
- 28: Veltins-Arena, Gelsenkirchen

In [None]:
ld_dp = lambda di, df, euro24_stadiums: f"""
    select 
          "day"
        , area_id
        , ndids
    from covid_dids_pings_per_day_sdkv6
    where
            "day" between '{di}' and '{df}' -- = '2024-07-05'
        and area_id in ({','.join([str(aid) for aid in euro24_stadiums + add_stadiums])}) -- = 11
        and "source" = 'ANY'
        and homeloc = 'Deutschland'
        and to_char(stime, 'HH24:MI:SS') = '23:59:59'
"""
panelstad_data = pd.DataFrame(pd.read_sql_query(ld_dp(di, df, euro24_stadiums), conn))
panelstad_data

In [None]:
euro24_stadiums = [1,2,3,4,10,11,15,24,25,28]
add_stadiums = [16,29,41,64]# additional stadiums not involved in EURO 2024

In [None]:
panelstad_data2 = pd.DataFrame(daylist, columns=['day']).merge(pd.DataFrame(euro24_stadiums+add_stadiums, columns=['area_id']), how='cross')
panelstad_data2 = panelstad_data2.merge(panelstad_data, on=['day','area_id'], how='left').fillna(0.)
panelstad_data2['ndids'] = panelstad_data2.ndids.astype(int)
panelstad_data2

In [None]:
panelstad_data2.to_csv('output/00_panelstad_data2.csv', index=False)

In [None]:
panelstad_data2 = pd.read_csv('output/00_panelstad_data2.csv')
panelstad_data2['day'] = [d.date() for d in pd.to_datetime(panelstad_data2.day)]
panelstad_data2

In [None]:
panelstad_data2.ndids.max()

## load ping numbers per device $n_p$

### ... for cities

In [None]:
query = f"""
    with npings as (
    	select city, sum(ndids) as npings
    	from ts_plz_population as tpp
    	join covid_dids_pings_per_day_sdkv6 as dp on dp.homeloc = tpp.id_plz5
    	where area_id=0 and "source"='ANY' and (stime at time zone 'UTC')::time < '23:59:00'
    	and "day" between '{str(di)}' and '{str(df)}'
    	and city in ('Berlin','Dortmund','Düsseldorf','Frankfurt am Main','Gelsenkirchen','Hamburg','Köln','Leipzig','München','Stuttgart')
    	group by 1
    )
    , ndids as (
    	select city, sum(ndids) as ndids
    	from ts_plz_population as tpp
    	join covid_dids_pings_per_day_sdkv6 as dp on dp.homeloc = tpp.id_plz5
    	where area_id=0 and "source"='ANY' and (stime at time zone 'UTC')::time = '23:59:59'
        and "day" between '{str(di)}' and '{str(df)}'
    	and city in ('Berlin','Dortmund','Düsseldorf','Frankfurt am Main','Gelsenkirchen','Hamburg','Köln','Leipzig','München','Stuttgart')
    	group by 1
    )
    select nd.city, nd.ndids, np.npings, np.npings::real/nd.ndids::real as npingsperdid
    from ndids as nd
    join npings as np on np.city = nd.city
    order by 1
"""
data_pingfreq = pd.DataFrame(pd.read_sql_query(query, conn))
data_pingfreq

In [None]:
data_pingfreq.to_csv('output/00_data_pingfreq.csv', index=False)

In [None]:
data_pingfreq = pd.read_csv('output/00_data_pingfreq.csv')
data_pingfreq

In [None]:
query = f"""
    with npings as (
    	select city, "day", sum(ndids) as npings
    	from ts_plz_population as tpp
    	join covid_dids_pings_per_day_sdkv6 as dp on dp.homeloc = tpp.id_plz5
    	where area_id=0 and "source"='ANY' and (stime at time zone 'UTC')::time < '23:59:00'
    	and "day" between '{str(di)}' and '{str(df)}'
    	and city in ('Berlin','Dortmund','Düsseldorf','Frankfurt am Main','Gelsenkirchen','Hamburg','Köln','Leipzig','München','Stuttgart')
    	group by 1,2
    )
    , ndids as (
    	select city, "day", sum(ndids) as ndids
    	from ts_plz_population as tpp
    	join covid_dids_pings_per_day_sdkv6 as dp on dp.homeloc = tpp.id_plz5
    	where area_id=0 and "source"='ANY' and (stime at time zone 'UTC')::time = '23:59:59'
        and "day" between '{str(di)}' and '{str(df)}'
    	and city in ('Berlin','Dortmund','Düsseldorf','Frankfurt am Main','Gelsenkirchen','Hamburg','Köln','Leipzig','München','Stuttgart')
    	group by 1,2
    )
    select nd.city, avg(np.npings::real/nd.ndids::real) as npingsperdid
    from ndids as nd
    join npings as np on np.city = nd.city and np."day" = nd."day"
    group by 1
    order by 1
"""
data_pingfreq2 = pd.DataFrame(pd.read_sql_query(query, conn))
data_pingfreq2

## mass event data

### event data

EURO 24 match data: https://www.fr.de/sport/fussball/em-2024-spielplan-ergebnisse-termine-gruppen-uhrzeit-deutschland-fussball-news-92086708.html

In [None]:
match_data = [
    [date(2024,6,14), 'München', 'auto', 'GER-SCO', True, 'auto'],
    [date(2024,6,15), 'Köln', 'auto', 'HUN-SUI', True, 'auto'],
    [date(2024,6,19), 'Köln', 'auto', 'SCO-SUI', True, 'auto'],
    [date(2024,6,19), 'Stuttgart', 'auto', 'GER-HUN', True, 'auto'],
    [date(2024,6,23), 'Frankfurt am Main', 'auto', 'SUI-GER', True, 'auto'],
    [date(2024,6,23), 'Stuttgart', 'auto', 'SCO-HUN', True, 'auto'],

    [date(2024,6,15), 'Berlin', 'auto', 'ESP-CRO', True, 'auto'],
    [date(2024,6,15), 'Dortmund', 'auto', 'ITA-ALB', True, 'auto'],
    [date(2024,6,19), 'Hamburg', 'auto', 'ALB-CRO', True, 'auto'],
    [date(2024,6,20), 'Gelsenkirchen', 'auto', 'ESP-ITA', True, 'auto'],
    [date(2024,6,24), 'Düsseldorf', 'auto', 'ALB-ESP', True, 'auto'],
    [date(2024,6,24), 'Leipzig', 'auto', 'CRO-ITA', True, 'auto'],

    [date(2024,6,16), 'Stuttgart', 'auto', 'SLO-DEN', True, 'auto'],
    [date(2024,6,16), 'Gelsenkirchen', 'auto', 'SRB-ENG', True, 'auto'],
    [date(2024,6,20), 'Frankfurt am Main', 'auto', 'DEN-ENG', True, 'auto'],
    [date(2024,6,20), 'München', 'auto', 'SLO-SRB', True, 'auto'],
    [date(2024,6,25), 'Köln', 'auto', 'ENG-SLO', True, 'auto'],
    [date(2024,6,25), 'München', 'auto', 'DEN-SRB', True, 'auto'],

    [date(2024,6,16), 'Hamburg', 'auto', 'POL-NED', True, 'auto'],
    [date(2024,6,17), 'Düsseldorf', 'auto', 'AUT-FRA', True, 'auto'],
    [date(2024,6,21), 'Berlin', 'auto', 'POL-AUT', True, 'auto'],
    [date(2024,6,21), 'Leipzig', 'auto', 'NED-FRA', True, 'auto'],
    [date(2024,6,25), 'Dortmund', 'auto', 'FRA-POL', True, 'auto'],
    [date(2024,6,25), 'Berlin', 'auto', 'NED-AUT', True, 'auto'],

    [date(2024,6,17), 'Frankfurt am Main', 'auto', 'BEL-SVK', True, 'auto'],
    [date(2024,6,17), 'München', 'auto', 'ROM-UKR', True, 'auto'],
    [date(2024,6,21), 'Düsseldorf', 'auto', 'SVK-UKR', True, 'auto'],
    [date(2024,6,22), 'Köln', 'auto', 'BEL-ROM', True, 'auto'],
    [date(2024,6,26), 'Stuttgart', 'auto', 'UKR-BEL', True, 'auto'],
    [date(2024,6,26), 'Frankfurt am Main', 'auto', 'SVK-ROM', True, 'auto'],

    [date(2024,6,18), 'Dortmund', 'auto', 'TUR-GEO', True, 'auto'],
    [date(2024,6,18), 'Leipzig', 'auto', 'POR-CZE', True, 'auto'],
    [date(2024,6,22), 'Dortmund', 'auto', 'TUR-POR', True, 'auto'],
    [date(2024,6,22), 'Hamburg', 'auto', 'GEO-CZE', True, 'auto'],
    [date(2024,6,26), 'Hamburg', 'auto', 'CZE-TUR', True, 'auto'],
    [date(2024,6,26), 'Gelsenkirchen', 'auto', 'GEO-POR', True, 'auto'],

    [date(2024,6,29), 'Berlin', 'auto', 'SUI-ITA', True, 'auto'],
    [date(2024,6,29), 'Dortmund', 'auto', 'GER-DEN', True, 'auto'],
    [date(2024,6,30), 'Gelsenkirchen', 'auto', 'ENG-SVK', True, 'auto'],
    [date(2024,6,30), 'Köln', 'auto', 'ESP-GEO', True, 'auto'],
    [date(2024,7,1), 'Düsseldorf', 'auto', 'FRA-BEL', True, 'auto'],
    [date(2024,7,1), 'Frankfurt am Main', 'auto', 'POR-SLO', True, 'auto'],
    [date(2024,7,2), 'München', 'auto', 'ROM-NED', True, 'auto'],
    [date(2024,7,2), 'Leipzig', 'auto', 'AUT-TUR', True, 'auto'],

    [date(2024,7,5), 'Stuttgart', 'auto', 'ESP-GER', True, 'auto'],
    [date(2024,7,5), 'Hamburg', 'auto', 'POR-FRA', True, 'auto'],
    [date(2024,7,6), 'Düsseldorf', 'auto', 'ENG-SUI', True, 'auto'],
    [date(2024,7,6), 'Berlin', 'auto', 'NED-TUR', True, 'auto'],

    [date(2024,7,9), 'München', 'auto', 'ESP-FRA', True, 'auto'],
    [date(2024,7,10), 'Dortmund', 'auto', 'ENG-NED', True, 'auto'],

    [date(2024,7,14), 'Berlin', 'auto', 'ESP-ENG', True, 'auto'],

    [date(2024,6,9), 'München', 41, 'AC/DC', False, 66000],# 66000 https://www.abendzeitung-muenchen.de/kultur/musik/so-war-das-erste-konzert-von-acdc-im-olympiastadion-in-muenchen-art-985477#:~:text=AC%2FDC%20in%20M%C3%BCnchen%3A%20Ein,auch%20nicht%20vom%20Dauerregen%20verderben.
    [date(2024,6,12), 'München', 41, 'AC/DC', False, 66000],# 66000
    [date(2024,6,22), 'München', 41, 'A Gabalier', False, 60000],# 60000 https://www.merkur.de/kultur/andreas-gabalier-muenchen-olmypiastadion-konzert-david-hasselhoff-mario-barth-zr-93146068.html#:~:text=%E2%80%9EVolks%2DRock%27n%27,so%20oder%20so%20%C3%A4hnlich%20zutr%C3%A4gt.
    [date(2024,7,11), 'Frankfurt am Main', 'auto', 'Rammstein', False, 40000],# 40000 https://www.hessenschau.de/kultur/rammstein-in-frankfurt-kein-platz-mehr-fuer-ironie-v1,rammstein-frankfurt-konzert-100.html
    [date(2024,7,12), 'Frankfurt am Main', 'auto', 'Rammstein', False, 40000],# 40000 https://www.faz.net/aktuell/rhein-main/kultur/rammstein-in-frankfurt-begeisterte-fans-im-waldstadion-und-kritik-19852500.html
    [date(2024,7,13), 'Frankfurt am Main', 'auto', 'Rammstein', False, 40000],# 40000 https://www.faz.net/aktuell/rhein-main/kultur/rammstein-in-frankfurt-begeisterte-fans-im-waldstadion-und-kritik-19852500.html
    [date(2024,6,18), 'Frankfurt am Main', 'auto', 'Death Punch', False, 15000],# 15000 https://time-for-metal.eu/five-finger-death-punch-am-18-06-2024-in-der-festhalle-frankfurt/#:~:text=Jetzt%20hei%C3%9Ft%20es%20auf%20den,f%C3%BCr%20Five%20Finger%20Death%20Punch!
    [date(2024,7,12), 'Köln', 'auto', 'P Maffay', False, 37000],# 37000 https://www.express.de/koeln/peter-maffay-in-koeln-verkehrs-chaos-konzert-startet-spaeter-825568
    [date(2024,7,13), 'Köln', 'auto', 'R Kaiser', False, 42000],# 42000 https://www.derwesten.de/panorama/promi-tv/roland-kaiser-koeln-konzert-kinder-id301052575.html#:~:text=Es%20war%20eines%20der%20gr%C3%B6%C3%9Ften,Rhein%2DEnergie%2DStadion%20spielte.
    [date(2024,6,1), 'Dortmund', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,7,8), 'München', 'auto', 'Tollwood', False, 'auto'],
    [date(2024,5,12), 'München', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,24), 'München', 41, 'Metallica', False, 75000],# 75000 https://www.burnyourears.de/live/54216-metallica-konzertbericht-zum-auftakt-der-m72-world-tour-in-m%C3%BCnchen.html#:~:text=Mai%202024%20%E2%80%93%20Erster%20Abend&text=Das%20optische%20und%20akustische%20Donnergrollen,Anwesenden%20im%20M%C3%BCnchner%20Olympiastadion%20nachhallen.
    [date(2024,5,26), 'München', 41, 'Metallica', False, 75000],# 75000
    [date(2024,5,19), 'Hamburg', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,11), 'Gelsenkirchen', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,17), 'Gelsenkirchen', 'auto', 'AC/DC', False, 55000],# 55000 https://www.waz.de/staedte/gelsenkirchen/article242363698/AC-DC-rocken-die-Veltins-Arena-in-Gelsenkirchen.html
    [date(2024,5,21), 'Gelsenkirchen', 'auto', 'AC/DC', False, 54000],# 54000 https://www.radioemscherlippe.de/artikel/gelsenkirchen-zweites-acdc-konzert-in-der-arena-1990321.html
    [date(2024,5,18), 'Stuttgart', 'auto', 'BLiga', True, 'auto'],
    [date(2024,6,1), 'Düsseldorf', -1, 'Japan Day', False, 'auto'],# https://www.dus.emb-japan.go.jp/itpr_ja/11_000001_00955.html
    [date(2024,6,28), 'Düsseldorf', -1, 'DoKomi', False, 'auto'],
    [date(2024,6,29), 'Düsseldorf', -1, 'DoKomi', False, 'auto'],
    [date(2024,6,30), 'Düsseldorf', -1, 'DoKomi', False, 'auto'],
    [date(2024,5,18), 'Dortmund', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,24), 'Dortmund', -1, 'Dogs & Fun', False, 'auto'],
    [date(2024,5,18), 'Frankfurt am Main', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,11), 'Leipzig', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,11), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,9), 'Köln', 'auto', 'DFB Pokal', True, 'auto'],
    [date(2024,5,19), 'Düsseldorf', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,25), 'Berlin', 'auto', 'DFB Pokal', True, 'auto'],
    [date(2024,5,11), 'Berlin', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,26), 'Berlin', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,5), 'Berlin', 'auto', 'S25 Berlin', False, 'auto'],
    [date(2024,5,4), 'Dortmund', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,1), 'Dortmund', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,5,27), 'Düsseldorf', 'auto', 'BLiga rel', True, 'auto'],
    [date(2024,5,3), 'Düsseldorf', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,5), 'Frankfurt am Main', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,27), 'Gelsenkirchen', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,3), 'Hamburg', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,4), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,27), 'Leipzig', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,27), 'München', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,30), 'München', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,5,4), 'Stuttgart', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,7), 'Dortmund', 'auto', 'B2Run', True, 'auto'],
    [date(2024,4,28), 'Berlin', 16, 'RLiga (f)', True, 'auto'],
    [date(2024,5,5), 'Berlin', 16, 'BLiga', True, 'auto'],
    [date(2024,5,18), 'Berlin', 16, 'BLiga', True, 'auto'],
    [date(2024,6,9), 'Berlin', 16, 'RLiga (f)', True, 'auto'],
    [date(2024,5,25), 'Frankfurt am Main', 64, 'HLiga', True, 'auto'],
    [date(2024,6,30), 'Frankfurt am Main', 64, 'Am. Football', False, 'auto'],
    [date(2024,7,13), 'Frankfurt am Main', 64, 'Am. Football', False, 'auto'],
    [date(2024,4,26), 'Hamburg', 29, 'BLiga', True, 'auto'],
    [date(2024,5,12), 'Hamburg', 29, 'BLiga', True, 'auto'],
    [date(2024,5,11), 'Hamburg', -1, 'Hafengeburtstag', False, 'auto'],
    [date(2024,5,17), 'Hamburg', -1, 'M M-Westernhagen', False, 11000],# 11000 https://www.soundsandbooks.com/marius-mueller-westernhagen-live-in-hamburg-2024/#:~:text=Als%20um%2020.15%20Uhr%20der,ausverkauften%20Halle%20auf%20ihren%20Sitzen.
    [date(2024,5,25), 'Hamburg', 29, 'RLiga (f)', True, 'auto'],
    [date(2024,6,1), 'Hamburg', 29, 'HipHop', True, 'auto'],
    [date(2024,6,29), 'München', 41, 'fan zone', False, 'auto'],
    [date(2024,7,5), 'München', 41, 'fan zone', False, 'auto'],
    [date(2024,7,13), 'München', 41, 'A Bayern', False, 'auto'],
    [date(2024,7,17), 'München', 41, 'B2Run', False, 'auto'],

    [date(2024,7,18), 'München', -1, 'Tollwood', False, 'auto'],
    [date(2024,7,14), 'Hamburg', 'auto', 'Am. Football', False, 'auto'],
    [date(2024,7,17), 'Gelsenkirchen', 'auto', 'T Swift', False, 60000],# 60000 https://www.welt.de/wirtschaft/article252642610/Taylor-Swift-Mehr-Amerikaner-als-Berliner-diese-Menschen-waren-beim-Konzert-in-Gelsenkirchen.html
    [date(2024,7,18), 'Gelsenkirchen', 'auto', 'T Swift', False, 60000],# 60000
    [date(2024,7,19), 'Gelsenkirchen', 'auto', 'T Swift', False, 60000],# 60000
    [date(2024,7,15), 'Stuttgart', -1, 'P Maffay', False, 16000],# 16000 https://www.stuttgarter-nachrichten.de/inhalt.konzert-auf-dem-wasen-so-war-s-bei-peter-maffay-in-stuttgart.f0e482a1-b87d-4f05-9cd3-2daf990c236a.html
    [date(2024,7,16), 'Stuttgart', -1, 'P Maffay', False, 16000],
    [date(2024,7,17), 'Stuttgart', -1, 'AC/DC', False, 90000],# 90000 https://www.swr.de/swraktuell/baden-wuerttemberg/stuttgart/acdc-konzert-stuttgart-100.html
    [date(2024,7,19), 'Stuttgart', 'auto', 'P!NK', False, 45000],# 45000 https://www.stuttgarter-nachrichten.de/inhalt.pink-konzert-in-stuttgart-was-fuer-ein-spektakel.1acf464d-6301-483d-8b07-8efd24276f18.html
    [date(2024,6,27), 'Stuttgart', -1, 'fan zone', False, 'auto'],
    [date(2024,7,20), 'Düsseldorf', 'auto', 'Coldplay', False, 47000],# 47000 https://www.rundschau-online.de/kultur/coldplay-konzert-im-duesseldorfer-stadion-begeistert-47-000-831694
    [date(2024,7,21), 'Düsseldorf', 'auto', 'Coldplay', False, 47000],# 47000
    [date(2024,7,23), 'Düsseldorf', 'auto', 'Coldplay', False, 47000],# 47000
    [date(2024,7,18), 'Frankfurt am Main', 'auto', 'P Maffay', False, 39000],# 39000 https://cityguide-rhein-neckar.de/2024/07/19/ein-abend-voller-emotionen-peter-maffay-abschiedskonzert-im-deutsche-bank-park/
    [date(2024,7,20), 'Frankfurt am Main', 'auto', 'R Kaiser', False, 25000],# 25000 https://cityguide-rhein-neckar.de/2024/07/21/roland-kaiser-begeistert-25-000-fans-im-deutsche-bank-park/#:~:text=20.07.2024%20%2D%20Roland%20Kaiser%20bei,Kaiser%2050%20Jahre%2050%20Hits.
    [date(2024,7,17), 'Leipzig', 'auto', 'P!NK', False, 43000],# 43000 https://www.radiosaw.de/artikel/fotos-pink-konzert-leipzig-am-17-juli
    [date(2024,7,19), 'Leipzig', 'auto', 'R Kaiser', False, 44000],# 44000 https://www.lvz.de/kultur/regional/der-kaiser-gibt-sich-die-ehre-ILGMCNV6JJBDPLEBGZ3MTYWNHA.html#:~:text=Um%20ihr%20Idol%20zu%20sehen,Zuschauer%20kein%20Weg%20zu%20weit.&text=%E2%80%9E50%20Jahre%2C%2050%20Hits%E2%80%9C,19.%20Juli%20begeisterte%20die%20Fangemeinde.
    [date(2024,7,20), 'Leipzig', 'auto', 'P Maffay', False, 38000],# 38000 https://www.rnd.de/kultur/peter-maffay-in-leipzig-buehnenabschied-im-stadion-ruehrt-fans-zu-traenen-FVW6BH4VWZH5LMLDHXW4DA4R6U.html#:~:text=Das%20Taschentuch%20in%20der%20Hand,letzte%20Konzert%20von%20Peter%20Maffay.
    [date(2024,7,20), 'Köln', -1, 'CSD', False, 'auto'],
    [date(2024,7,21), 'Köln', -1, 'CSD', False, 'auto'],
    [date(2024,6,1), 'Berlin', -1, 'Schlagernacht', False, 'auto'],

    [date(2024,7,13), 'Berlin', -1, 'Rundfunk Open Air', False, 17000],# 17000 https://www.mix1.de/news/2024-berliner-rundfunk-open-air/#:~:text=20%20Jahre%20Berliner%20Rundfunk%20Open,das%2017.000%20begeisterte%20Besucher%20verzauberte.
    [date(2024,6,10), 'Berlin', -1, 'Green Day', False, 22000],# 22000 https://www.rollingstone.de/green-day-live-in-berlin-anfahrt-einlass-support-setlist-und-wetter-2729627/#:~:text=Die%20Waldb%C3%BChne%20bietet%20Platz%20f%C3%BCr,Das%20Konzert%20ist%20restlos%20ausverkauft.
    [date(2024,5,4), 'Berlin', -1, 'Mario Barth', False, 22000],# 22000 https://www.waldbuehne-berlin.de/event/mario_barth_2026-05-09_19/#:~:text=Zuletzt%20war%20Mario%20Barth%20am,22.000%20begeisterte%20Fans%20gefeiert%20wurde.
    [date(2024,8,3), 'Berlin', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,12), 'Berlin', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,27), 'Berlin', 16, 'friendly match', True, 'auto'],
    [date(2024,8,3), 'Berlin', 16, 'friendly match', True, 'auto'],
    [date(2024,4,20), 'Berlin', 16, 'BLiga', True, 'auto'],
    [date(2024,4,6), 'Berlin', 16, 'BLiga', True, 'auto'],
    [date(2024,8,3), 'Dortmund', 'auto', 'BLiga3', True, 'auto'],
    [date(2024,4,21), 'Dortmund', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,16), 'Dortmund', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,4,6), 'Dortmund', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,20), 'Düsseldorf', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,7), 'Düsseldorf', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,5), 'Frankfurt am Main', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,19), 'Frankfurt am Main', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,26), 'Frankfurt am Main', 'auto', 'T Scott', False, 'auto'],
    [date(2024,7,27), 'Frankfurt am Main', 'auto', 'T Scott', False, 'auto'],
    [date(2024,7,20), 'Köln', 'auto', 'T Scott', False, 'auto'],
    [date(2024,4,13), 'Gelsenkirchen', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,26), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],# https://www.festivalsunited.com/magazine/konzerte/das-war-rammstein-in-der-veltins-arena-gelsenkirchen
    [date(2024,7,27), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],
    [date(2024,7,29), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],
    [date(2024,7,30), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],
    [date(2024,7,31), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],
    [date(2024,8,3), 'Gelsenkirchen', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,6), 'Hamburg', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,20), 'Hamburg', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,23), 'Hamburg', 'auto', 'T Swift', False, 50000],# https://www.ardmediathek.de/video/hamburg-journal/taylor-swift-verzaubert-50-000-fans-in-hamburg/ndr/Y3JpZDovL25kci5kZS84YzkzMDYyZi1jMDY1LTRmZjktOWI3Yi0wNTc4OWZlMThiYjE#:~:text=Hamburg%20Journal%3A%20Taylor%20Swift%20verzaubert%2050.000%20Fans%20in%20Hamburg%20%7C%20Video,(24.7.2024)%20mit%20Untertitel
    [date(2024,7,24), 'Hamburg', 'auto', 'T Swift', False, 50000],
    [date(2024,4,14), 'Hamburg', 29, 'BLiga', True, 'auto'],
    [date(2024,4,6), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,20), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,8,2), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,13), 'Leipzig', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,17), 'München', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,4,13), 'München', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,27), 'München', 41, 'T Swift', False, 70000],# https://www.allgaeuer-zeitung.de/bayern/taylor-swift-in-muenchen-eras-tour-2024-im-olympiastadion-27-7-24_arid-775404
    [date(2024,7,28), 'München', 41, 'T Swift', False, 70000],
    [date(2024,4,13), 'Stuttgart', 'auto', 'BLiga', True, 'auto'],

    [date(2024,8,3), 'Stuttgart', -1, 'SDP', False, 30000],# https://www.stuttgarter-nachrichten.de/inhalt.berliner-musiker-duo-in-stuttgart-die-energie-freisetzen-sdp-rocken-den-wasen-zum-jubilaeumskonzert.2cf5bc24-4c05-4979-9c9d-5a8c0e7acde9.html
    [date(2024,8,3), 'Dortmund', -1, 'Dortmund OLE', False, 'auto'],
    [date(2024,5,19), 'Dortmund', -1, 'Schlagerfest XXL', False, 9000],# https://rheinruhraktuell.de/9000-fans-feiern-mit-florian-silbereisen-die-jubilaeumsausgabe-des-schlagerfestes/
    [date(2024,5,27), 'Frankfurt am Main', -1, 'Apache 207', False, 'auto'],
    [date(2024,5,6), 'Dortmund', -1, 'Apache 207', False, 'auto'],
    [date(2024,7,21), 'Gelsenkirchen', -1, 'Schalke Tach', False, 'auto'],
    [date(2024,5,24), 'Köln', -1, 'H Carpendale', False, 'auto'],
    [date(2024,8,2), 'Leipzig', -1, 'SDP', False, 'auto'],
    [date(2024,7,19), 'Düsseldorf', -1, 'Rheinkirmes', False, 'auto'],
    [date(2024,6,2), 'Leipzig', -1, 'Stadtfest', False, 'auto'],
    [date(2024,5,19), 'Leipzig', -1, 'Weinfest', False, 'auto'],
    [date(2024,5,8), 'Leipzig', -1, 'Apache 207', False, 'auto'],
    [date(2024,6,6), 'Leipzig', -1, 'A Rieu', False, 'auto'],
    [date(2024,6,4), 'Leipzig', -1, 'A Rieu', False, 'auto'],
    [date(2024,7,11), 'Stuttgart', -1, 'Death Punch', False, 'auto'],
    [date(2024,4,30), 'Stuttgart', -1, 'Bushido', False, 'auto'],
    [date(2024,5,12), 'Stuttgart', -1, 'Apache 207', False, 'auto'],
    [date(2024,5,31), 'Stuttgart', -1, 'B Ceylan', False, 'auto'],
    #[date(), '', 'auto', '', False, 'auto'],

]
match_data = pd.DataFrame(match_data, columns=['day','city','area_id','match','is_football','capacity'])
match_data

### stadium meta data

In [None]:
ld_csa = f"""
    select 
          area_id
        , area_name
    from cluster_search_areas_v2
"""
stadname_data = pd.DataFrame(pd.read_sql_query(ld_csa, conn))
stadname_data['area_name'] = [name.split(',')[0] for name in stadname_data.area_name]

In [None]:
replace_dict = {'Mercedes-Benz Arena':'MHP Arena', 'Volksparkarena':'Volksparkstadion'}
stadname_data = stadname_data.replace(replace_dict)

In [None]:
stadname_data.to_csv('output/00_stadname_data.csv', index=False)

In [None]:
stadname_data = pd.read_csv('output/00_stadname_data.csv')
aid2name = {aid: name for aid, name in zip(stadname_data.area_id, stadname_data.area_name)}
stadname_data

In [None]:
stadname_data[stadname_data.area_id==1]

stadium capacity data: https://de.wikipedia.org/wiki/Fu%C3%9Fball-Europameisterschaft_2024

In [None]:
stadium_data = [
    ['Berlin', 2, 71000, 3645000],#'Berlin'
    ['Berlin', 16, 22000, 3645000],
    ['Dortmund', 10, 62000, 587000],#'Dortmund'
    ['Düsseldorf', 24, 47000, 619000],#'Düsseldorf'
    ['Frankfurt am Main', 4, 47000, 753000],#'Frankfurt am Main'
    ['Frankfurt am Main', 64, 12000, 753000],
    ['Gelsenkirchen', 28, 50000, 261000],#'Gelsenkirchen'
    ['Hamburg', 25, 49000, 1841000],#'Hamburg'
    ['Hamburg', 29, 30000, 1841000],
    ['Köln', 15, 43000, 1086000],#'Köln'
    ['Leipzig', 3, 40000, 588000],#'Leipzig'
    ['München', 1, 66000, 1472000],#'München'
    ['München', 41, 69000, 1472000],
    ['Stuttgart', 11, 54000, 635000],#'Stuttgart'
]
stadium_data = pd.DataFrame(stadium_data, columns=['city','area_id','capacity','population'])
stadium_data

In [None]:
stadium_data.to_csv('output/00_stadium_data.csv', index=False)

In [None]:
stadium_data = pd.read_csv('output/00_stadium_data.csv')
stadium_data

In [None]:
aid2city = {aid: city for aid, city in sorted({
    1: 'München',
    2: 'Berlin',
    3: 'Leipzig',
    4: 'Frankfurt am Main',
    10: 'Dortmund',
    11: 'Stuttgart',
    15: 'Köln',
    24: 'Düsseldorf',
    25: 'Hamburg',
    28: 'Gelsenkirchen',

    16: 'Berlin',
    29: 'Hamburg',
    41: 'München',
    64: 'Frankfurt am Main',
}.items(), key=lambda item: item[1])}
city2aid = {city: aid for aid, city in [(aid, city) for aid, city in aid2city.items()][::-1]}

In [None]:
# map standard stadion IDs per city
match_data['area_id'] = [aid if aid!='auto' else city2aid[city] for aid, city in zip(match_data.area_id, match_data.city)]
# map stadion capacity for football matches
match_data['capacity'] = [stadium_data[stadium_data.area_id==aid].capacity.iloc[0] if npop=='auto' and aid!=-1 else npop for aid, npop in zip(match_data.area_id, match_data.capacity)]
match_data

In [None]:
match_data.to_csv('output/00_event_data.csv', index=False)

## load data

In [None]:
# load stadium meta data
stadname_data = pd.read_csv('output/00_stadname_data.csv')
aid2name = {aid: name for aid, name in zip(stadname_data.area_id, stadname_data.area_name)}

aid2city = {aid: city for aid, city in sorted({
    1: 'München',
    2: 'Berlin',
    3: 'Leipzig',
    4: 'Frankfurt am Main',
    10: 'Dortmund',
    11: 'Stuttgart',
    15: 'Köln',
    24: 'Düsseldorf',
    25: 'Hamburg',
    28: 'Gelsenkirchen',

    16: 'Berlin',
    29: 'Hamburg',
    41: 'München',
    64: 'Frankfurt am Main',
}.items(), key=lambda item: item[1])}
city2aid = {city: aid for aid, city in [(aid, city) for aid, city in aid2city.items()][::-1]}

In [None]:
# n_c Germany & stadiums: load total contact numbers for Germany and stadiums
data_germany = pd.read_csv(f'output/00_ncontacts_germany_{cdef}.csv')
data_germany['day'] = [d.date() for d in pd.to_datetime(data_germany.day)]

# n_c cities: load total contact numbers numbers for cities
data = pd.read_csv(f'output/00_ncontacts_cities_{cdef}.csv')
data['day'] = [d.date() for d in pd.to_datetime(data.day)]

# n_pop cities & stadiums: load stadium capacity data for EURO 2024
stadium_data = pd.read_csv('output/00_stadium_data.csv')

# n_d Germany: load did numbers for Germany
panel_data_germany = pd.read_csv('output/00_panel_data_germany.csv')
panel_data_germany['day'] = [d.date() for d in pd.to_datetime(panel_data_germany.day)]

data_germany = data_germany.merge(panel_data_germany, on='day')

# n_d cities: load did numbers for cities
panel_data = pd.read_csv('output/00_panel_data.csv')
panel_data = panel_data.merge(stadium_data[['city','population']])
panel_data['pdid'] = panel_data.ndids / panel_data.population

# n_d stadiums: load did numbers for stadiums
panelstad_data2 = pd.read_csv('output/00_panelstad_data2.csv')
panelstad_data2['day'] = [d.date() for d in pd.to_datetime(panelstad_data2.day)]

panel_data = panel_data.merge(stadium_data[['city','population']])
panel_data['pdid'] = panel_data.ndids / panel_data.population

In [None]:
# load mass event data
match_data = pd.read_csv('output/00_event_data.csv')
match_data['day'] = [d.date() for d in pd.to_datetime(match_data.day)]
match_data['capacity'] = [float(cap) if cap!='auto' else cap for cap in match_data.capacity]