In [13]:
import os, pandas as pd#, pytz, numpy as np
from sqlalchemy import create_engine
from datetime import datetime, timedelta#, date

# database connection

In [8]:
# database credentials
db_usr, db_pwd = os.getenv('DB_USR'), os.getenv('DB_PWD') # your database user name and password
# database login
host, port, db = 'nc-health-data-prod.cluster-ccsgl7rk4urn.eu-central-1.rds.amazonaws.com', 5432, 'master'

In [11]:
# for queries with output
engine = create_engine('postgresql://'+db_usr+':'+db_pwd+'@'+host+':'+str(port)+'/'+db)
conn = engine.connect()

In [12]:
conn.close()

# parameters

In [1]:
yeari, yearf = '2024', '2024'
weeki, weekf = '18', '31'

In [4]:
di = datetime.strptime(f'{yeari}-{weeki}-1', "%Y-%W-%w").date()
df = datetime.strptime(f'{yearf}-{weekf}-1', "%Y-%W-%w").date() + timedelta(6)
ds = [di+timedelta(dt) for dt in range((df-di).days+1)]
daylist = ds
print(di, 'until', df)

2024-04-29 until 2024-08-04


In [5]:
cdef = 'tl7_10m'# 'tl5_10m' 'tl6_10m' 'tl7_10m' 'tl8_10m' 'tl8_60m'
cdef_alt = '16m_10min'# tl5: 62   tl6: 31   tl7: 16   tl8: 8

# generate data

## load device numbers $n_d$

### ... for Germany

dids with home in Germany:

In [None]:
query = """
    select count(distinct did)
    from home_work_sdkv6_202406
    where place = 'home'
"""
pd.DataFrame(pd.read_sql_query(query, conn))

-> 328817

all dids passing through Germany in whole timespan:

In [None]:
query = f"""
    with pings as (
    	select *
    	from ex_corona_sdkv6
    	where "day" between '{di}' and '{df}' -- between '{di}' and '{df}' -- = '2024-06-01'
    )
    select count(distinct did) as ndids
    from pings
"""
pd.DataFrame(pd.read_sql_query(query, conn))

-> 1424328

### ... for cities

dids with home in the city:

In [None]:
ld_hw = """
    with cities1 as (
    	select osm_id, "name", way_area, way
    	from planet_osm_polygon
    	where osm_id in (-62428,-62422,-62649,-62400,-1829065,-2793104,-62578,-62539,-62782,-62522)
    ),
    cities2 as (
    	select osm_id, "name", max(way_area) as way_area
    	from cities1
    	group by 1,2
    ),
    cities3 as (
    	select c1."name", c1.way
    	from cities1 as c1
    	join cities2 as c2 on c1.way_area = c2.way_area
    )
    select ci."name" as city, count(distinct hw.did) as ndids
    from home_work_sdkv6_202405 as hw, cities3 as ci
    where place = 'home'
    and st_contains(ci.way, st_transform(hw.weighted_centroid, 3857))
    group by 1
"""
panel_data = pd.DataFrame(pd.read_sql_query(ld_hw, conn))
panel_data

In [None]:
panel_data.to_csv('data/metadata/panel_data.csv', index=False)

In [19]:
# n_d cities: load did numbers for cities
panel_data = pd.read_csv('data/metadata/panel_data.csv')
panel_data = panel_data.merge(stadium_data[['city','population']])
panel_data['pdid'] = panel_data.ndids / panel_data.population
panel_data

Unnamed: 0,city,ndids,population,pdid
0,Berlin,7355,3645000,0.002018
1,Berlin,7355,3645000,0.002018
2,Dortmund,2265,587000,0.003859
3,Düsseldorf,1418,619000,0.002291
4,Frankfurt am Main,1080,753000,0.001434
5,Frankfurt am Main,1080,753000,0.001434
6,Gelsenkirchen,1489,261000,0.005705
7,Hamburg,3466,1841000,0.001883
8,Hamburg,3466,1841000,0.001883
9,Köln,1908,1086000,0.001757


all dids passing through city in whole timespan:

In [None]:
ld_nd = f"""
    with tiles as (
    	select *
    	from tuberlin_euro2024_tileid
    )
    , pings as (
    	select *
    	from ex_corona_sdkv6
    	where "day" between '{di}' and '{df}' -- between '{di}' and '{df}' -- = '2024-06-01'
    )
    , joined as (
    	select *
    	from tiles as t
    	join pings as p on p.tile_id = t.tile_id
    )
    select name as city, count(distinct did) as ndids
    from joined -- pings -- joined
    group by 1
    order by 1
"""
panel_data_transient = pd.DataFrame(pd.read_sql_query(ld_nd, conn))
panel_data_transient

### ... for stadiums

all dids in stadium in a day:

area ID / stadium name (table `cluster_search_areas_v2`)

- 1: Allianz Arena, München
- 2: Olympiastadion, Berlin
- 3: Red Bull Arena, Leipzig
- 4: Deutsche Bank Park, Frankfurt
- 10: Signal Iduna Park, Dortmund
- 11: Mercedes-Benz Arena, Stuttgart
- 15: RheinEnergieStadion, Köln
- 24: Merkur Spiel-Arena, Düsseldorf
- 25: Volksparkarena, Hamburg
- 28: Veltins-Arena, Gelsenkirchen

In [None]:
ld_dp = lambda di, df, euro24_stadiums: f"""
    select 
          "day"
        , area_id
        , ndids
    from covid_dids_pings_per_day_sdkv6
    where
            "day" between '{di}' and '{df}' -- = '2024-07-05'
        and area_id in ({','.join([str(aid) for aid in euro24_stadiums + add_stadiums])}) -- = 11
        and "source" = 'ANY'
        and homeloc = 'Deutschland'
        and to_char(stime, 'HH24:MI:SS') = '23:59:59'
"""
panelstad_data = pd.DataFrame(pd.read_sql_query(ld_dp(di, df, euro24_stadiums), conn))
panelstad_data

In [None]:
euro24_stadiums = [1,2,3,4,10,11,15,24,25,28]
add_stadiums = [16,29,41,64]# additional stadiums not involved in EURO 2024

In [None]:
panelstad_data2 = pd.DataFrame(daylist, columns=['day']).merge(pd.DataFrame(euro24_stadiums+add_stadiums, columns=['area_id']), how='cross')
panelstad_data2 = panelstad_data2.merge(panelstad_data, on=['day','area_id'], how='left').fillna(0.)
panelstad_data2['ndids'] = panelstad_data2.ndids.astype(int)
panelstad_data2

In [None]:
panelstad_data2.to_csv('data/metadata/panelstad_data2.csv', index=False)

In [20]:
# n_d stadiums: load did numbers for stadiums
panelstad_data2 = pd.read_csv('data/metadata/panelstad_data2.csv')
panelstad_data2['day'] = [d.date() for d in pd.to_datetime(panelstad_data2.day)]
panelstad_data2

Unnamed: 0,day,area_id,ndids
0,2024-04-29,1,0
1,2024-04-29,2,0
2,2024-04-29,3,1
3,2024-04-29,4,1
4,2024-04-29,10,2
...,...,...,...
1367,2024-08-04,28,1
1368,2024-08-04,16,0
1369,2024-08-04,29,20
1370,2024-08-04,41,1


## load ping numbers per device $n_p$

### ... for Germany

dids with home in Germany:

In [53]:
query = f"""
    with npings as (
    	select sum(ndids) as npings
    	from covid_dids_pings_per_day_sdkv6 as dp
    	where "source"='ANY' and (stime at time zone 'UTC')::time < '23:59:00' -- and area_id=0
    	and "day" between '{str(di)}' and '{str(df)}'
    )
    select *
    from npings
"""
pd.DataFrame(pd.read_sql_query(query, conn))

Unnamed: 0,npings
0,1599792503


-> 1599391683 (`area_id` = 0)

-> 1599792503 (any `area_id`)

all dids passing through Germany in whole timespan:

In [None]:
query = f"""
    select sum(array_length(stime_arr, 1))
    from ex_corona_sdkv6
    where "day" between '{di}' and '{df}'
"""
pd.DataFrame(pd.read_sql_query(query, conn))

-> 2460712666

### ... for cities

dids with home in the city:

In [62]:
query = f"""
    with npings as (
    	select city, sum(ndids) as npings
    	from ts_plz_population as tpp
    	join covid_dids_pings_per_day_sdkv6 as dp on dp.homeloc = tpp.id_plz5
    	where area_id=0 and "source"='ANY' and (stime at time zone 'UTC')::time < '23:59:00'
    	and "day" between '{str(di)}' and '{str(df)}'
    	and city in ('Berlin','Dortmund','Düsseldorf','Frankfurt am Main','Gelsenkirchen','Hamburg','Köln','Leipzig','München','Stuttgart')
    	group by 1
    )
    , ndids as (
    	select city, sum(ndids) as ndids
    	from ts_plz_population as tpp
    	join covid_dids_pings_per_day_sdkv6 as dp on dp.homeloc = tpp.id_plz5
    	where area_id=0 and "source"='ANY' and (stime at time zone 'UTC')::time = '23:59:59'
        and "day" between '{str(di)}' and '{str(df)}'
    	and city in ('Berlin','Dortmund','Düsseldorf','Frankfurt am Main','Gelsenkirchen','Hamburg','Köln','Leipzig','München','Stuttgart')
    	group by 1
    )
    select nd.city, nd.ndids, np.npings
    from ndids as nd
    join npings as np on np.city = nd.city
    order by 1
"""
data_pingfreq = pd.DataFrame(pd.read_sql_query(query, conn))
data_pingfreq

Unnamed: 0,city,ndids,npings
0,Berlin,353402,18239313
1,Dortmund,110290,4895086
2,Düsseldorf,73831,3720448
3,Frankfurt am Main,52833,2606480
4,Gelsenkirchen,71387,3455363
5,Hamburg,168859,8363145
6,Köln,90051,4227072
7,Leipzig,158486,7637571
8,München,66807,2921822
9,Stuttgart,61204,2832907


In [63]:
data_pingfreq.to_csv('data/metadata/data_pingfreq.csv', index=False)

In [64]:
data_pingfreq = pd.read_csv('data/metadata/data_pingfreq.csv')
data_pingfreq

Unnamed: 0,city,ndids,npings
0,Berlin,353402,18239313
1,Dortmund,110290,4895086
2,Düsseldorf,73831,3720448
3,Frankfurt am Main,52833,2606480
4,Gelsenkirchen,71387,3455363
5,Hamburg,168859,8363145
6,Köln,90051,4227072
7,Leipzig,158486,7637571
8,München,66807,2921822
9,Stuttgart,61204,2832907


all dids passing through city in whole timespan:

In [None]:
ld_nd = f"""
    with tiles as (
    	select *
    	from tuberlin_euro2024_tileid
    )
    , pings as (
    	select *
    	from ex_corona_sdkv6
    	where "day" between '{di}' and '{df}' -- between '{di}' and '{df}' -- = '2024-06-01'
    )
    , joined as (
    	select *
    	from tiles as t
    	join pings as p on p.tile_id = t.tile_id
    )
    select name as city, sum(array_length(stime_arr, 1)) as npings
    from joined -- pings -- joined
    group by 1
    order by 1
"""
data_pingfreq_transient = pd.DataFrame(pd.read_sql_query(ld_nd, conn))
data_pingfreq_transient

In [15]:
data_q = data_pingfreq_transient.merge(panel_data_transient, on='city')
data_q['q'] = data_q.npings / data_q.ndids / ((df-di).days + 1) / (1440. / 10.)
data_q

NameError: name 'panel_data_transient' is not defined

## stadium meta data

In [None]:
ld_csa = f"""
    select 
          area_id
        , area_name
    from cluster_search_areas_v2
"""
stadname_data = pd.DataFrame(pd.read_sql_query(ld_csa, conn))
stadname_data['area_name'] = [name.split(',')[0] for name in stadname_data.area_name]

In [None]:
replace_dict = {'Mercedes-Benz Arena':'MHP Arena', 'Volksparkarena':'Volksparkstadion'}
stadname_data = stadname_data.replace(replace_dict)

In [None]:
stadname_data.to_csv('data/metadata/stadname_data.csv', index=False)

In [17]:
# load stadium meta data
stadname_data = pd.read_csv('data/metadata/stadname_data.csv')
aid2name = {aid: name for aid, name in zip(stadname_data.area_id, stadname_data.area_name)}

aid2city = {aid: city for aid, city in sorted({
    1: 'München',
    2: 'Berlin',
    3: 'Leipzig',
    4: 'Frankfurt am Main',
    10: 'Dortmund',
    11: 'Stuttgart',
    15: 'Köln',
    24: 'Düsseldorf',
    25: 'Hamburg',
    28: 'Gelsenkirchen',

    16: 'Berlin',
    29: 'Hamburg',
    41: 'München',
    64: 'Frankfurt am Main',
}.items(), key=lambda item: item[1])}
city2aid = {city: aid for aid, city in [(aid, city) for aid, city in aid2city.items()][::-1]}
stadname_data

Unnamed: 0,area_id,area_name
0,57,Stadion Lohmühle
1,58,Ernst-Abbe-Sportfeld
2,59,Europa-Park-Stadion
3,60,FC Bayern Camplus (Platz 1)
4,61,Hermann-Neuberger-Stadion
...,...,...
61,9,PreZero Arena
62,10,Signal Iduna Park
63,11,MHP Arena
64,12,MEWA ARENA


## stadium capacity data:

https://de.wikipedia.org/wiki/Fu%C3%9Fball-Europameisterschaft_2024

In [None]:
stadium_data = [
    ['Berlin', 2, 71000, 3645000],#'Berlin'
    ['Berlin', 16, 22000, 3645000],
    ['Dortmund', 10, 62000, 587000],#'Dortmund'
    ['Düsseldorf', 24, 47000, 619000],#'Düsseldorf'
    ['Frankfurt am Main', 4, 47000, 753000],#'Frankfurt am Main'
    ['Frankfurt am Main', 64, 12000, 753000],
    ['Gelsenkirchen', 28, 50000, 261000],#'Gelsenkirchen'
    ['Hamburg', 25, 49000, 1841000],#'Hamburg'
    ['Hamburg', 29, 30000, 1841000],
    ['Köln', 15, 43000, 1086000],#'Köln'
    ['Leipzig', 3, 40000, 588000],#'Leipzig'
    ['München', 1, 66000, 1472000],#'München'
    ['München', 41, 69000, 1472000],
    ['Stuttgart', 11, 54000, 635000],#'Stuttgart'
]
stadium_data = pd.DataFrame(stadium_data, columns=['city','area_id','capacity','population'])
stadium_data

In [None]:
stadium_data.to_csv('data/metadata/stadium_data.csv', index=False)

In [18]:
# n_pop cities & stadiums: load stadium capacity data for EURO 2024
stadium_data = pd.read_csv('data/metadata/stadium_data.csv')
stadium_data

Unnamed: 0,city,area_id,capacity,population
0,Berlin,2,71000,3645000
1,Berlin,16,22000,3645000
2,Dortmund,10,62000,587000
3,Düsseldorf,24,47000,619000
4,Frankfurt am Main,4,47000,753000
5,Frankfurt am Main,64,12000,753000
6,Gelsenkirchen,28,50000,261000
7,Hamburg,25,49000,1841000
8,Hamburg,29,30000,1841000
9,Köln,15,43000,1086000


## mass event data

In [None]:
match_data = [
    [date(2024,6,14), 'München', 'auto', 'GER-SCO', True, 'auto'],
    [date(2024,6,15), 'Köln', 'auto', 'HUN-SUI', True, 'auto'],
    [date(2024,6,19), 'Köln', 'auto', 'SCO-SUI', True, 'auto'],
    [date(2024,6,19), 'Stuttgart', 'auto', 'GER-HUN', True, 'auto'],
    [date(2024,6,23), 'Frankfurt am Main', 'auto', 'SUI-GER', True, 'auto'],
    [date(2024,6,23), 'Stuttgart', 'auto', 'SCO-HUN', True, 'auto'],

    [date(2024,6,15), 'Berlin', 'auto', 'ESP-CRO', True, 'auto'],
    [date(2024,6,15), 'Dortmund', 'auto', 'ITA-ALB', True, 'auto'],
    [date(2024,6,19), 'Hamburg', 'auto', 'ALB-CRO', True, 'auto'],
    [date(2024,6,20), 'Gelsenkirchen', 'auto', 'ESP-ITA', True, 'auto'],
    [date(2024,6,24), 'Düsseldorf', 'auto', 'ALB-ESP', True, 'auto'],
    [date(2024,6,24), 'Leipzig', 'auto', 'CRO-ITA', True, 'auto'],

    [date(2024,6,16), 'Stuttgart', 'auto', 'SVN-DEN', True, 'auto'],
    [date(2024,6,16), 'Gelsenkirchen', 'auto', 'SRB-ENG', True, 'auto'],
    [date(2024,6,20), 'Frankfurt am Main', 'auto', 'DEN-ENG', True, 'auto'],
    [date(2024,6,20), 'München', 'auto', 'SVN-SRB', True, 'auto'],
    [date(2024,6,25), 'Köln', 'auto', 'ENG-SVN', True, 'auto'],
    [date(2024,6,25), 'München', 'auto', 'DEN-SRB', True, 'auto'],

    [date(2024,6,16), 'Hamburg', 'auto', 'POL-NED', True, 'auto'],
    [date(2024,6,17), 'Düsseldorf', 'auto', 'AUT-FRA', True, 'auto'],
    [date(2024,6,21), 'Berlin', 'auto', 'POL-AUT', True, 'auto'],
    [date(2024,6,21), 'Leipzig', 'auto', 'NED-FRA', True, 'auto'],
    [date(2024,6,25), 'Dortmund', 'auto', 'FRA-POL', True, 'auto'],
    [date(2024,6,25), 'Berlin', 'auto', 'NED-AUT', True, 'auto'],

    [date(2024,6,17), 'Frankfurt am Main', 'auto', 'BEL-SVK', True, 'auto'],
    [date(2024,6,17), 'München', 'auto', 'ROU-UKR', True, 'auto'],
    [date(2024,6,21), 'Düsseldorf', 'auto', 'SVK-UKR', True, 'auto'],
    [date(2024,6,22), 'Köln', 'auto', 'BEL-ROU', True, 'auto'],
    [date(2024,6,26), 'Stuttgart', 'auto', 'UKR-BEL', True, 'auto'],
    [date(2024,6,26), 'Frankfurt am Main', 'auto', 'SVK-ROU', True, 'auto'],

    [date(2024,6,18), 'Dortmund', 'auto', 'TUR-GEO', True, 'auto'],
    [date(2024,6,18), 'Leipzig', 'auto', 'POR-CZE', True, 'auto'],
    [date(2024,6,22), 'Dortmund', 'auto', 'TUR-POR', True, 'auto'],
    [date(2024,6,22), 'Hamburg', 'auto', 'GEO-CZE', True, 'auto'],
    [date(2024,6,26), 'Hamburg', 'auto', 'CZE-TUR', True, 'auto'],
    [date(2024,6,26), 'Gelsenkirchen', 'auto', 'GEO-POR', True, 'auto'],

    [date(2024,6,29), 'Berlin', 'auto', 'SUI-ITA', True, 'auto'],
    [date(2024,6,29), 'Dortmund', 'auto', 'GER-DEN', True, 'auto'],
    [date(2024,6,30), 'Gelsenkirchen', 'auto', 'ENG-SVK', True, 'auto'],
    [date(2024,6,30), 'Köln', 'auto', 'ESP-GEO', True, 'auto'],
    [date(2024,7,1), 'Düsseldorf', 'auto', 'FRA-BEL', True, 'auto'],
    [date(2024,7,1), 'Frankfurt am Main', 'auto', 'POR-SVN', True, 'auto'],
    [date(2024,7,2), 'München', 'auto', 'ROU-NED', True, 'auto'],
    [date(2024,7,2), 'Leipzig', 'auto', 'AUT-TUR', True, 'auto'],

    [date(2024,7,5), 'Stuttgart', 'auto', 'ESP-GER', True, 'auto'],
    [date(2024,7,5), 'Hamburg', 'auto', 'POR-FRA', True, 'auto'],
    [date(2024,7,6), 'Düsseldorf', 'auto', 'ENG-SUI', True, 'auto'],
    [date(2024,7,6), 'Berlin', 'auto', 'NED-TUR', True, 'auto'],

    [date(2024,7,9), 'München', 'auto', 'ESP-FRA', True, 'auto'],
    [date(2024,7,10), 'Dortmund', 'auto', 'ENG-NED', True, 'auto'],

    [date(2024,7,14), 'Berlin', 'auto', 'ESP-ENG', True, 'auto'],

    [date(2024,6,9), 'München', 41, 'AC/DC', False, 66000],# 66000 https://www.abendzeitung-muenchen.de/kultur/musik/so-war-das-erste-konzert-von-acdc-im-olympiastadion-in-muenchen-art-985477#:~:text=AC%2FDC%20in%20M%C3%BCnchen%3A%20Ein,auch%20nicht%20vom%20Dauerregen%20verderben.
    [date(2024,6,12), 'München', 41, 'AC/DC', False, 66000],# 66000
    [date(2024,6,22), 'München', 41, 'A Gabalier', False, 60000],# 60000 https://www.merkur.de/kultur/andreas-gabalier-muenchen-olmypiastadion-konzert-david-hasselhoff-mario-barth-zr-93146068.html#:~:text=%E2%80%9EVolks%2DRock%27n%27,so%20oder%20so%20%C3%A4hnlich%20zutr%C3%A4gt.
    [date(2024,7,11), 'Frankfurt am Main', 'auto', 'Rammstein', False, 40000],# 40000 https://www.hessenschau.de/kultur/rammstein-in-frankfurt-kein-platz-mehr-fuer-ironie-v1,rammstein-frankfurt-konzert-100.html
    [date(2024,7,12), 'Frankfurt am Main', 'auto', 'Rammstein', False, 40000],# 40000 https://www.faz.net/aktuell/rhein-main/kultur/rammstein-in-frankfurt-begeisterte-fans-im-waldstadion-und-kritik-19852500.html
    [date(2024,7,13), 'Frankfurt am Main', 'auto', 'Rammstein', False, 40000],# 40000 https://www.faz.net/aktuell/rhein-main/kultur/rammstein-in-frankfurt-begeisterte-fans-im-waldstadion-und-kritik-19852500.html
    [date(2024,6,18), 'Frankfurt am Main', 'auto', 'Death Punch', False, 15000],# 15000 https://time-for-metal.eu/five-finger-death-punch-am-18-06-2024-in-der-festhalle-frankfurt/#:~:text=Jetzt%20hei%C3%9Ft%20es%20auf%20den,f%C3%BCr%20Five%20Finger%20Death%20Punch!
    [date(2024,7,12), 'Köln', 'auto', 'P Maffay', False, 37000],# 37000 https://www.express.de/koeln/peter-maffay-in-koeln-verkehrs-chaos-konzert-startet-spaeter-825568
    [date(2024,7,13), 'Köln', 'auto', 'R Kaiser', False, 42000],# 42000 https://www.derwesten.de/panorama/promi-tv/roland-kaiser-koeln-konzert-kinder-id301052575.html#:~:text=Es%20war%20eines%20der%20gr%C3%B6%C3%9Ften,Rhein%2DEnergie%2DStadion%20spielte.
    [date(2024,6,1), 'Dortmund', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,7,8), 'München', 'auto', 'Tollwood', False, 'auto'],#6000 https://www.muenchenticket.de/spielstaette/tollwood-sommerfestival/
    [date(2024,5,12), 'München', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,24), 'München', 41, 'Metallica', False, 75000],# 75000 https://www.burnyourears.de/live/54216-metallica-konzertbericht-zum-auftakt-der-m72-world-tour-in-m%C3%BCnchen.html#:~:text=Mai%202024%20%E2%80%93%20Erster%20Abend&text=Das%20optische%20und%20akustische%20Donnergrollen,Anwesenden%20im%20M%C3%BCnchner%20Olympiastadion%20nachhallen.
    [date(2024,5,26), 'München', 41, 'Metallica', False, 75000],# 75000
    [date(2024,5,19), 'Hamburg', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,11), 'Gelsenkirchen', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,17), 'Gelsenkirchen', 'auto', 'AC/DC', False, 55000],# 55000 https://www.waz.de/staedte/gelsenkirchen/article242363698/AC-DC-rocken-die-Veltins-Arena-in-Gelsenkirchen.html
    [date(2024,5,21), 'Gelsenkirchen', 'auto', 'AC/DC', False, 54000],# 54000 https://www.radioemscherlippe.de/artikel/gelsenkirchen-zweites-acdc-konzert-in-der-arena-1990321.html
    [date(2024,5,18), 'Stuttgart', 'auto', 'BLiga', True, 'auto'],
    [date(2024,6,1), 'Düsseldorf', -1, 'Japan Day', False, 'auto'],# https://www.dus.emb-japan.go.jp/itpr_ja/11_000001_00955.html
    [date(2024,6,28), 'Düsseldorf', -1, 'DoKomi', False, 'auto'],#180000 https://www.dokomi.de/de/kontakt/ausstelleranmeldung
    [date(2024,6,29), 'Düsseldorf', -1, 'DoKomi', False, 'auto'],
    [date(2024,6,30), 'Düsseldorf', -1, 'DoKomi', False, 'auto'],
    [date(2024,5,18), 'Dortmund', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,24), 'Dortmund', -1, 'Dogs & Fun', False, 'auto'],#25000 https://www.dogs-and-fun.com/presse/pressemitteilung?tx_news_pi1%5Baction%5D=detail&tx_news_pi1%5Bcontroller%5D=News&tx_news_pi1%5Bnews%5D=20&cHash=e3742aa20c8d9d3f970148f8116ca62c
    [date(2024,5,18), 'Frankfurt am Main', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,11), 'Leipzig', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,11), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,9), 'Köln', 'auto', 'DFB Pokal', True, 'auto'],
    [date(2024,5,19), 'Düsseldorf', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,25), 'Berlin', 'auto', 'DFB Pokal', True, 'auto'],
    [date(2024,5,11), 'Berlin', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,26), 'Berlin', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,5), 'Berlin', 'auto', 'S25 Berlin', False, 'auto'],
    [date(2024,5,4), 'Dortmund', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,1), 'Dortmund', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,5,27), 'Düsseldorf', 'auto', 'BLiga rel', True, 'auto'],
    [date(2024,5,3), 'Düsseldorf', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,5), 'Frankfurt am Main', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,27), 'Gelsenkirchen', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,3), 'Hamburg', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,4), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,27), 'Leipzig', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,27), 'München', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,30), 'München', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,5,4), 'Stuttgart', 'auto', 'BLiga', True, 'auto'],
    [date(2024,5,7), 'Dortmund', 'auto', 'B2Run', True, 'auto'],
    [date(2024,4,28), 'Berlin', 16, 'RLiga (f)', True, 'auto'],
    [date(2024,5,5), 'Berlin', 16, 'BLiga', True, 'auto'],
    [date(2024,5,18), 'Berlin', 16, 'BLiga', True, 'auto'],
    [date(2024,6,9), 'Berlin', 16, 'RLiga (f)', True, 'auto'],
    [date(2024,5,25), 'Frankfurt am Main', 64, 'HLiga', True, 'auto'],
    [date(2024,6,30), 'Frankfurt am Main', 64, 'Am. Football', False, 'auto'],#6700 https://www.football-aktuell.de/cgi-bin/news.pl?artikel=1724521679802815
    [date(2024,7,13), 'Frankfurt am Main', 64, 'Am. Football', False, 'auto'],#6700 https://www.football-aktuell.de/cgi-bin/news.pl?artikel=1724521679802815
    [date(2024,4,26), 'Hamburg', 29, 'BLiga', True, 'auto'],
    [date(2024,5,12), 'Hamburg', 29, 'BLiga', True, 'auto'],
    [date(2024,5,11), 'Hamburg', -1, 'Hafengeburtstag', False, 'auto'],#375000 https://www.ndr.de/nachrichten/hamburg/So-war-der-Hafengeburtstag-2024-in-Hamburg-,hafengeburtstag3742.html
    [date(2024,5,17), 'Hamburg', -1, 'M M-Westernhagen', False, 11000],# 11000 https://www.soundsandbooks.com/marius-mueller-westernhagen-live-in-hamburg-2024/#:~:text=Als%20um%2020.15%20Uhr%20der,ausverkauften%20Halle%20auf%20ihren%20Sitzen.
    [date(2024,5,25), 'Hamburg', 29, 'RLiga (f)', True, 'auto'],
    [date(2024,6,1), 'Hamburg', 29, 'HipHop', True, 'auto'],
    [date(2024,6,29), 'München', 41, 'fan zone', False, 'auto'],#10000 https://www.antenne.group/presse/pressemitteilungen/ueber-700.000-besuchende-in-der-fan-zone-antenne-bayern-feiert-erfolgreichen-abschluss-der-uefa-euro-2024%E2%84%A2-als-radio-partner-und-lokaler-foerderer-der-stadt-muenchen
    [date(2024,7,5), 'München', 41, 'fan zone', False, 'auto'],#10000 https://www.antenne.group/presse/pressemitteilungen/ueber-700.000-besuchende-in-der-fan-zone-antenne-bayern-feiert-erfolgreichen-abschluss-der-uefa-euro-2024%E2%84%A2-als-radio-partner-und-lokaler-foerderer-der-stadt-muenchen
    [date(2024,7,13), 'München', 41, 'A Bayern', False, 'auto'],
    [date(2024,7,17), 'München', 41, 'B2Run', False, 'auto'],#30000 https://www.wernerconsult.com/blog/b2run-muenchen/

    [date(2024,7,18), 'München', -1, 'Tollwood', False, 'auto'],#6000 https://www.muenchenticket.de/spielstaette/tollwood-sommerfestival/
    [date(2024,7,14), 'Hamburg', 'auto', 'Am. Football', False, 'auto'],
    [date(2024,7,17), 'Gelsenkirchen', 'auto', 'T Swift', False, 60000],# 60000 https://www.welt.de/wirtschaft/article252642610/Taylor-Swift-Mehr-Amerikaner-als-Berliner-diese-Menschen-waren-beim-Konzert-in-Gelsenkirchen.html
    [date(2024,7,18), 'Gelsenkirchen', 'auto', 'T Swift', False, 60000],# 60000
    [date(2024,7,19), 'Gelsenkirchen', 'auto', 'T Swift', False, 60000],# 60000
    [date(2024,7,15), 'Stuttgart', -1, 'P Maffay', False, 16000],# 16000 https://www.stuttgarter-nachrichten.de/inhalt.konzert-auf-dem-wasen-so-war-s-bei-peter-maffay-in-stuttgart.f0e482a1-b87d-4f05-9cd3-2daf990c236a.html
    [date(2024,7,16), 'Stuttgart', -1, 'P Maffay', False, 16000],
    [date(2024,7,17), 'Stuttgart', -1, 'AC/DC', False, 90000],# 90000 https://www.swr.de/swraktuell/baden-wuerttemberg/stuttgart/acdc-konzert-stuttgart-100.html
    [date(2024,7,19), 'Stuttgart', 'auto', 'P!NK', False, 45000],# 45000 https://www.stuttgarter-nachrichten.de/inhalt.pink-konzert-in-stuttgart-was-fuer-ein-spektakel.1acf464d-6301-483d-8b07-8efd24276f18.html
    [date(2024,6,27), 'Stuttgart', -1, 'fan zone', False, 'auto'],
    [date(2024,7,20), 'Düsseldorf', 'auto', 'Coldplay', False, 47000],# 47000 https://www.rundschau-online.de/kultur/coldplay-konzert-im-duesseldorfer-stadion-begeistert-47-000-831694
    [date(2024,7,21), 'Düsseldorf', 'auto', 'Coldplay', False, 47000],# 47000
    [date(2024,7,23), 'Düsseldorf', 'auto', 'Coldplay', False, 47000],# 47000
    [date(2024,7,18), 'Frankfurt am Main', 'auto', 'P Maffay', False, 39000],# 39000 https://cityguide-rhein-neckar.de/2024/07/19/ein-abend-voller-emotionen-peter-maffay-abschiedskonzert-im-deutsche-bank-park/
    [date(2024,7,20), 'Frankfurt am Main', 'auto', 'R Kaiser', False, 25000],# 25000 https://cityguide-rhein-neckar.de/2024/07/21/roland-kaiser-begeistert-25-000-fans-im-deutsche-bank-park/#:~:text=20.07.2024%20%2D%20Roland%20Kaiser%20bei,Kaiser%2050%20Jahre%2050%20Hits.
    [date(2024,7,17), 'Leipzig', 'auto', 'P!NK', False, 43000],# 43000 https://www.radiosaw.de/artikel/fotos-pink-konzert-leipzig-am-17-juli
    [date(2024,7,19), 'Leipzig', 'auto', 'R Kaiser', False, 44000],# 44000 https://www.lvz.de/kultur/regional/der-kaiser-gibt-sich-die-ehre-ILGMCNV6JJBDPLEBGZ3MTYWNHA.html#:~:text=Um%20ihr%20Idol%20zu%20sehen,Zuschauer%20kein%20Weg%20zu%20weit.&text=%E2%80%9E50%20Jahre%2C%2050%20Hits%E2%80%9C,19.%20Juli%20begeisterte%20die%20Fangemeinde.
    [date(2024,7,20), 'Leipzig', 'auto', 'P Maffay', False, 38000],# 38000 https://www.rnd.de/kultur/peter-maffay-in-leipzig-buehnenabschied-im-stadion-ruehrt-fans-zu-traenen-FVW6BH4VWZH5LMLDHXW4DA4R6U.html#:~:text=Das%20Taschentuch%20in%20der%20Hand,letzte%20Konzert%20von%20Peter%20Maffay.
    [date(2024,7,20), 'Köln', -1, 'CSD', False, 'auto'],#200000 https://www1.wdr.de/nachrichten/rheinland/csd-koeln-190.html
    [date(2024,7,21), 'Köln', -1, 'CSD', False, 'auto'],#1200000 https://www1.wdr.de/nachrichten/rheinland/csd-koeln-190.html
    [date(2024,6,1), 'Berlin', -1, 'Schlagernacht', False, 'auto'],

    [date(2024,7,13), 'Berlin', -1, 'Rundfunk Open Air', False, 17000],# 17000 https://www.mix1.de/news/2024-berliner-rundfunk-open-air/#:~:text=20%20Jahre%20Berliner%20Rundfunk%20Open,das%2017.000%20begeisterte%20Besucher%20verzauberte.
    [date(2024,6,10), 'Berlin', -1, 'Green Day', False, 22000],# 22000 https://www.rollingstone.de/green-day-live-in-berlin-anfahrt-einlass-support-setlist-und-wetter-2729627/#:~:text=Die%20Waldb%C3%BChne%20bietet%20Platz%20f%C3%BCr,Das%20Konzert%20ist%20restlos%20ausverkauft.
    [date(2024,5,4), 'Berlin', -1, 'Mario Barth', False, 22000],# 22000 https://www.waldbuehne-berlin.de/event/mario_barth_2026-05-09_19/#:~:text=Zuletzt%20war%20Mario%20Barth%20am,22.000%20begeisterte%20Fans%20gefeiert%20wurde.
    [date(2024,8,3), 'Berlin', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,12), 'Berlin', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,27), 'Berlin', 16, 'friendly match', True, 'auto'],
    [date(2024,8,3), 'Berlin', 16, 'friendly match', True, 'auto'],
    [date(2024,4,20), 'Berlin', 16, 'BLiga', True, 'auto'],
    [date(2024,4,6), 'Berlin', 16, 'BLiga', True, 'auto'],
    [date(2024,8,3), 'Dortmund', 'auto', 'BLiga3', True, 'auto'],
    [date(2024,4,21), 'Dortmund', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,16), 'Dortmund', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,4,6), 'Dortmund', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,20), 'Düsseldorf', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,7), 'Düsseldorf', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,5), 'Frankfurt am Main', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,19), 'Frankfurt am Main', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,26), 'Frankfurt am Main', 'auto', 'T Scott', False, 'auto'],
    [date(2024,7,27), 'Frankfurt am Main', 'auto', 'T Scott', False, 'auto'],
    [date(2024,7,20), 'Köln', 'auto', 'T Scott', False, 'auto'],
    [date(2024,4,13), 'Gelsenkirchen', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,26), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],# https://www.festivalsunited.com/magazine/konzerte/das-war-rammstein-in-der-veltins-arena-gelsenkirchen
    [date(2024,7,27), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],
    [date(2024,7,29), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],
    [date(2024,7,30), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],
    [date(2024,7,31), 'Gelsenkirchen', 'auto', 'Rammstein', False, 60000],
    [date(2024,8,3), 'Gelsenkirchen', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,6), 'Hamburg', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,20), 'Hamburg', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,23), 'Hamburg', 'auto', 'T Swift', False, 50000],# https://www.ardmediathek.de/video/hamburg-journal/taylor-swift-verzaubert-50-000-fans-in-hamburg/ndr/Y3JpZDovL25kci5kZS84YzkzMDYyZi1jMDY1LTRmZjktOWI3Yi0wNTc4OWZlMThiYjE#:~:text=Hamburg%20Journal%3A%20Taylor%20Swift%20verzaubert%2050.000%20Fans%20in%20Hamburg%20%7C%20Video,(24.7.2024)%20mit%20Untertitel
    [date(2024,7,24), 'Hamburg', 'auto', 'T Swift', False, 50000],
    [date(2024,4,14), 'Hamburg', 29, 'BLiga', True, 'auto'],
    [date(2024,4,6), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,20), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,8,2), 'Köln', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,13), 'Leipzig', 'auto', 'BLiga', True, 'auto'],
    [date(2024,4,17), 'München', 'auto', 'UEFA CL', True, 'auto'],
    [date(2024,4,13), 'München', 'auto', 'BLiga', True, 'auto'],
    [date(2024,7,27), 'München', 41, 'T Swift', False, 70000],# https://www.allgaeuer-zeitung.de/bayern/taylor-swift-in-muenchen-eras-tour-2024-im-olympiastadion-27-7-24_arid-775404
    [date(2024,7,28), 'München', 41, 'T Swift', False, 70000],
    [date(2024,4,13), 'Stuttgart', 'auto', 'BLiga', True, 'auto'],

    [date(2024,8,3), 'Stuttgart', -1, 'SDP', False, 30000],# https://www.stuttgarter-nachrichten.de/inhalt.berliner-musiker-duo-in-stuttgart-die-energie-freisetzen-sdp-rocken-den-wasen-zum-jubilaeumskonzert.2cf5bc24-4c05-4979-9c9d-5a8c0e7acde9.html
    [date(2024,8,3), 'Dortmund', -1, 'Dortmund OLE', False, 'auto'],#20000 (2023) https://www.ruhr24.de/dortmund/20000-besucher-feiern-schlager-stars-dortmund-13092851.html
    [date(2024,5,19), 'Dortmund', -1, 'Schlagerfest XXL', False, 9000],# https://rheinruhraktuell.de/9000-fans-feiern-mit-florian-silbereisen-die-jubilaeumsausgabe-des-schlagerfestes/
    [date(2024,5,27), 'Frankfurt am Main', -1, 'Apache 207', False, 'auto'],#13500 Festhalle Frankfurt
    [date(2024,5,6), 'Dortmund', -1, 'Apache 207', False, 'auto'],#13000 https://www.waz.de/kultur/article242264160/Apache-207-verbluefft-Fans-in-Dortmund-beim-Lindenberg-Hit.html
    [date(2024,7,21), 'Gelsenkirchen', -1, 'Schalke Tach', False, 'auto'],#75000 https://schalke04.de/inside/schalke-tach-75-000-fans-in-partystimmung/
    [date(2024,5,24), 'Köln', -1, 'H Carpendale', False, 'auto'],
    [date(2024,8,2), 'Leipzig', -1, 'SDP', False, 'auto'],#12000 https://www.quarterback-immobilien-arena.de/medien/pressemitteilung-1/deutschsprachiger-hiphop-stark-im-trend-konzertsaison-in-der-quarterback-immobilien
    [date(2024,7,19), 'Düsseldorf', -1, 'Rheinkirmes', False, 'auto'],#360000 https://www.tonight.de/events/kirmes/rund-vier-millionen-besucher-auf-der-rheinkirmes-2024-veranstalter-ziehen-positives-fazit_361626.html
    [date(2024,6,2), 'Leipzig', -1, 'Stadtfest', False, 'auto'],#100000 https://www.leipzig.de/newsarchiv/news/leipziger-stadtfest-2024
    [date(2024,5,19), 'Leipzig', -1, 'Weinfest', False, 'auto'],#27500 https://bildlexikon-leipzig.de/leipziger-veranstaltungen/leipziger-weinfest/
    [date(2024,5,8), 'Leipzig', -1, 'Apache 207', False, 'auto'],#12000 (2022) https://www.lvz.de/kultur/regional/konzert-in-leipzig-apache-207-begeistert-12-000-fans-in-der-arena-5RDICLXXZDSUXOUXAOQO4J64QU.html
    [date(2024,6,6), 'Leipzig', -1, 'A Rieu', False, 'auto'],#7500 https://www.quarterback-immobilien-arena.de/medien/pressemitteilung-1/deutschsprachiger-hiphop-stark-im-trend-konzertsaison-in-der-quarterback-immobilien
    [date(2024,6,4), 'Leipzig', -1, 'A Rieu', False, 'auto'],#7500 https://www.quarterback-immobilien-arena.de/medien/pressemitteilung-1/deutschsprachiger-hiphop-stark-im-trend-konzertsaison-in-der-quarterback-immobilien
    [date(2024,7,11), 'Stuttgart', -1, 'Death Punch', False, 'auto'],
    [date(2024,4,30), 'Stuttgart', -1, 'Bushido', False, 'auto'],#6000 https://www.hallenduo.de/de/home/news/details/news/bushido-erhaelt-sold-out-award-der-porsche-arena
    [date(2024,5,12), 'Stuttgart', -1, 'Apache 207', False, 'auto'],
    [date(2024,5,31), 'Stuttgart', -1, 'B Ceylan', False, 'auto'],
    #[date(), '', 'auto', '', False, 'auto'],

]
match_data = pd.DataFrame(match_data, columns=['day','city','area_id','match','is_football','capacity'])
match_data

In [71]:
aid2city = {aid: city for aid, city in sorted({
    1: 'München',
    2: 'Berlin',
    3: 'Leipzig',
    4: 'Frankfurt am Main',
    10: 'Dortmund',
    11: 'Stuttgart',
    15: 'Köln',
    24: 'Düsseldorf',
    25: 'Hamburg',
    28: 'Gelsenkirchen',

    16: 'Berlin',
    29: 'Hamburg',
    41: 'München',
    64: 'Frankfurt am Main',
}.items(), key=lambda item: item[1])}
city2aid = {city: aid for aid, city in [(aid, city) for aid, city in aid2city.items()][::-1]}

In [72]:
# map standard stadion IDs per city
match_data['area_id'] = [aid if aid!='auto' else city2aid[city] for aid, city in zip(match_data.area_id, match_data.city)]
# map stadion capacity for football matches
match_data['capacity'] = [stadium_data[stadium_data.area_id==aid].capacity.iloc[0] if npop=='auto' and aid!=-1 else npop for aid, npop in zip(match_data.area_id, match_data.capacity)]
match_data

Unnamed: 0,day,city,area_id,match,is_football,capacity
0,2024-06-14,München,1,GER-SCO,True,66000
1,2024-06-15,Köln,15,HUN-SUI,True,43000
2,2024-06-19,Köln,15,SCO-SUI,True,43000
3,2024-06-19,Stuttgart,11,GER-HUN,True,54000
4,2024-06-23,Frankfurt am Main,4,SUI-GER,True,47000
...,...,...,...,...,...,...
190,2024-06-04,Leipzig,-1,A Rieu,False,auto
191,2024-07-11,Stuttgart,-1,Death Punch,False,auto
192,2024-04-30,Stuttgart,-1,Bushido,False,auto
193,2024-05-12,Stuttgart,-1,Apache 207,False,auto


In [73]:
match_data.to_csv('data/metadata/event_data.csv', index=False)

In [22]:
# load mass event data
match_data = pd.read_csv('data/metadata/event_data.csv')
match_data['day'] = [d.date() for d in pd.to_datetime(match_data.day)]
match_data['capacity'] = [float(cap) if cap!='auto' else cap for cap in match_data.capacity]
match_data

Unnamed: 0,day,city,area_id,match,is_football,capacity
0,2024-06-14,München,1,GER-SCO,True,66000.0
1,2024-06-15,Köln,15,HUN-SUI,True,43000.0
2,2024-06-19,Köln,15,SCO-SUI,True,43000.0
3,2024-06-19,Stuttgart,11,GER-HUN,True,54000.0
4,2024-06-23,Frankfurt am Main,4,SUI-GER,True,47000.0
...,...,...,...,...,...,...
190,2024-06-04,Leipzig,-1,A Rieu,False,auto
191,2024-07-11,Stuttgart,-1,Death Punch,False,auto
192,2024-04-30,Stuttgart,-1,Bushido,False,auto
193,2024-05-12,Stuttgart,-1,Apache 207,False,auto


In [23]:
for _, row in match_data[['city','day','area_id','match','is_football','capacity']].sort_values(['city','day']).iterrows():
    print(row.city, row.day, row.area_id, row.match, row.is_football, row.capacity)

Berlin 2024-04-06 16 BLiga True 22000.0
Berlin 2024-04-12 2 BLiga True 71000.0
Berlin 2024-04-20 16 BLiga True 22000.0
Berlin 2024-04-26 2 BLiga True 71000.0
Berlin 2024-04-28 16 RLiga (f) True 22000.0
Berlin 2024-05-04 -1 Mario Barth False 22000.0
Berlin 2024-05-05 2 S25 Berlin False 71000.0
Berlin 2024-05-05 16 BLiga True 22000.0
Berlin 2024-05-11 2 BLiga True 71000.0
Berlin 2024-05-18 16 BLiga True 22000.0
Berlin 2024-05-25 2 DFB Pokal True 71000.0
Berlin 2024-06-01 -1 Schlagernacht False auto
Berlin 2024-06-09 16 RLiga (f) True 22000.0
Berlin 2024-06-10 -1 Green Day False 22000.0
Berlin 2024-06-15 2 ESP-CRO True 71000.0
Berlin 2024-06-21 2 POL-AUT True 71000.0
Berlin 2024-06-25 2 NED-AUT True 71000.0
Berlin 2024-06-29 2 SUI-ITA True 71000.0
Berlin 2024-07-06 2 NED-TUR True 71000.0
Berlin 2024-07-13 -1 Rundfunk Open Air False 17000.0
Berlin 2024-07-14 2 ESP-ENG True 71000.0
Berlin 2024-07-27 16 friendly match True 22000.0
Berlin 2024-08-03 2 BLiga True 71000.0
Berlin 2024-08-03 16 f

## EURO 2024 match data

EURO 24 match data: https://www.fr.de/sport/fussball/em-2024-spielplan-ergebnisse-termine-gruppen-uhrzeit-deutschland-fussball-news-92086708.html

In [6]:
# Creating the DataFrame with the updated match information, German city names, and times
data_group_a = {
    'City': ['München', 'Köln', 'Stuttgart', 'Köln', 'Frankfurt am Main', 'Stuttgart'],
    'Date': ['2024-06-14', '2024-06-15', '2024-06-19', '2024-06-19', '2024-06-23', '2024-06-23'],
    'Time': ['21:00', '15:00', '18:00', '21:00', '21:00', '21:00'],
    'Population': [1471508, 1085664, 634830, 1085664, 753056, 634830],
    'Match': [
        'Germany 5 - 1 Scotland', 
        'Hungary 1 - 3 Switzerland', 
        'Germany 2 - 0 Hungary', 
        'Scotland 1 - 1 Switzerland', 
        'Switzerland 1 - 1 Germany', 
        'Scotland 0 - 1 Hungary'
    ]
}
df_group_a = pd.DataFrame(data_group_a)

data_group_b = {
    'City': ['Berlin', 'Dortmund', 'Hamburg', 'Gelsenkirchen', 'Leipzig', 'Düsseldorf'],
    'Date': ['2024-06-15', '2024-06-15', '2024-06-19', '2024-06-20', '2024-06-24', '2024-06-24'],
    'Time': ['18:00', '21:00', '15:00', '21:00', '21:00', '21:00'],
    'Population': [3769000, 588462, 1841179, 260000, 597493, 619294],
    'Match': [
        'Spain 3 - 0 Croatia', 
        'Italy 2 - 1 Albania', 
        'Croatia 2 - 2 Albania', 
        'Spain 1 - 0 Italy', 
        'Croatia 1 - 1 Italy', 
        'Albania 0 - 1 Spain'
    ]
}
df_group_b = pd.DataFrame(data_group_b)

data_group_c = {
    'City': ['Stuttgart', 'Gelsenkirchen', 'München', 'Frankfurt am Main', 'Köln', 'München'],
    'Date': ['2024-06-16', '2024-06-16', '2024-06-20', '2024-06-20', '2024-06-25', '2024-06-25'],
    'Time': ['18:00', '21:00', '15:00', '18:00', '21:00', '21:00'],
    'Population': [634830, 260000, 1472000, 753056, 1085664, 1472000],
    'Match': [
        'Slovenia 1 - 1 Denmark', 
        'Serbia 0 - 1 England', 
        'Slovenia 1 - 1 Serbia', 
        'Denmark 1 - 1 England', 
        'England 0 - 0 Slovenia', 
        'Denmark 0 - 0 Serbia'
    ]
}

df_group_c= pd.DataFrame(data_group_c)

data_group_d = {
    'City': ['Hamburg', 'Düsseldorf', 'Berlin', 'Leipzig', 'Berlin', 'Dortmund'],
    'Date': ['2024-06-16', '2024-06-17', '2024-06-21', '2024-06-21', '2024-06-25', '2024-06-25'],
    'Time': ['15:00', '21:00', '18:00', '21:00', '18:00', '18:00'],
    'Population': [1841179, 619294, 3769000, 597493, 3769000, 588462],
    'Match': [
        'Poland 1 - 2 Netherlands', 
        'Austria 0 - 1 France', 
        'Poland 1 - 3 Austria', 
        'Netherlands 0 - 0 France', 
        'Netherlands 2 - 3 Austria', 
        'France 1 - 1 Poland'
    ]
}
df_group_d = pd.DataFrame(data_group_d)


data_group_e = {
    'City': ['München', 'Frankfurt am Main', 'Düsseldorf', 'Köln', 'Frankfurt am Main', 'Stuttgart'],
    'Date': ['2024-06-17', '2024-06-17', '2024-06-21', '2024-06-22', '2024-06-26', '2024-06-26'],
    'Time': ['15:00', '18:00', '15:00', '21:00', '18:00', '18:00'],
    'Population': [1488202, 753056, 617280, 1080394, 753056, 635911],
    'Match': [
        'Romania 3 - 0 Ukraine', 
        'Belgium 0 - 1 Slovakia', 
        'Slovakia 1 - 2 Ukraine', 
        'Belgium 2 - 0 Romania', 
        'Slovakia 1 - 1 Romania', 
        'Ukraine 0 - 0 Belgium'
    ]
}

df_group_e = pd.DataFrame(data_group_e)


# Creating the DataFrame with the match information, cities, dates, times, and populations
data_group_f = {
    'City': ['Dortmund', 'Leipzig', 'Hamburg', 'Dortmund', 'Gelsenkirchen', 'Hamburg'],
    'Date': ['2024-06-18', '2024-06-18', '2024-06-22', '2024-06-22', '2024-06-26', '2024-06-26'],
    'Time': ['18:00', '21:00', '15:00', '18:00', '21:00', '21:00'],
    'Population': [588462, 597493, 1841179, 588462, 260000, 1841179],
    'Match': [
        'Turkey 3 - 1 Georgia', 
        'Portugal 2 - 1 Czechia', 
        'Georgia 1 - 1 Czechia', 
        'Turkey 0 - 3 Portugal', 
        'Georgia 2 - 0 Portugal', 
        'Czechia 1 - 2 Turkey'
    ]
}
df_group_f = pd.DataFrame(data_group_f)




data_eurocup_16th = {
    'City': ['Dortmund', 'Berlin', 'Gelsenkirchen', 'Köln', 'Düsseldorf', 'Frankfurt am Main', 'München', 'Leipzig'],
    'Date': ['2024-06-29', '2024-06-29', '2024-06-30', '2024-06-30', '2024-07-01', '2024-07-01', '2024-07-02', '2024-07-02'],
    'Time': ['21:00', '18:00', '18:00', '21:00', '18:00', '21:00', '18:00', '21:00'],
    'Population': [588462, 3644826, 260000, 1085664, 617280, 736414, 1471508, 597493],
    'Match': [
        'Germany 2 - 0 Denmark',
        'Switzerland 2 - 0 Italy',
        'England 2 - 1 Slovakia ',
        'Spain 4 - 1 Georgia',
        'France 1 - 0 Belgium',
        'Portugal 0 - 0 Slovenia',
        'Romania 0 - 3 Netherlands',
        'Austria 1 - 2 Turkey'
    ]
}

df_eurocup_16th = pd.DataFrame(data_eurocup_16th)

# Creating the dataframe for the semifinals
data_semifinals = {
    'City': ['München', 'Dortmund'],
    'Date': ['2024-07-09', '2024-07-10'],
    'Time': ['21:00', '21:00'],
    'Population': [1471508, 588462],
    'Match': [
        'Spain 2 - 1 France',
        'Netherlands 1 - 2 England'
    ]
}

df_semifinals = pd.DataFrame(data_semifinals)

# Creating the dataframe for the quarterfinals
data_quarterfinals = {
    'City': ['Stuttgart', 'Hamburg', 'Berlin', 'Düsseldorf'],
    'Date': ['2024-07-05', '2024-07-05', '2024-07-06', '2024-07-06'],
    'Time': ['18:00', '21:00', '18:00', '21:00'],
    'Population': [634830, 1841179, 3644826, 617280],
    'Match': [
        'Spain 2 - 1 Germany',
        'Portugal 0 - 0 France ',
        'Netherlands 2 - 1 Turkey',
        'England 1 - 1 Switzerland'
    ]
}

df_quarterfinals = pd.DataFrame(data_quarterfinals)

data_final = {
    'City': ['Berlin'],
    'Date': ['2024-07-14'],
    'Time': ['21:00'],
    'Population': [3644826],
    'Match': [
        'Spain 2 - 1 England'
    ]
}

df_final = pd.DataFrame(data_final)



# Creating the DataFrame with the match information, cities, dates, times, and populations


df_group_a['Group'] = 'Group A'
df_group_b['Group'] = 'Group B'
df_group_c['Group'] = 'Group C'
df_group_d['Group'] = 'Group D'
df_group_e['Group'] = 'Group E'
df_group_f['Group'] = 'Group F'

# Combine the DataFrames
combined_df = pd.concat([df_group_a, df_group_b, df_group_c, df_group_d, df_group_e, df_group_f, df_eurocup_16th, df_quarterfinals, df_semifinals, df_final ], ignore_index=True)
combined_df = combined_df.rename(columns={'City':'city','Date':'day','Time':'hour_match','Match':'match'})
combined_df = combined_df[['city','day','hour_match','match']]
combined_df['hour_match'] = combined_df.hour_match.apply(lambda x: int(x.split(':')[0]))
combined_df['day'] = combined_df.day.apply(lambda x: datetime.strptime(x, '%Y-%m-%d').date())
# dataframe for match metadata
combined_df

Unnamed: 0,city,day,hour_match,match
0,München,2024-06-14,21,Germany 5 - 1 Scotland
1,Köln,2024-06-15,15,Hungary 1 - 3 Switzerland
2,Stuttgart,2024-06-19,18,Germany 2 - 0 Hungary
3,Köln,2024-06-19,21,Scotland 1 - 1 Switzerland
4,Frankfurt am Main,2024-06-23,21,Switzerland 1 - 1 Germany
5,Stuttgart,2024-06-23,21,Scotland 0 - 1 Hungary
6,Berlin,2024-06-15,18,Spain 3 - 0 Croatia
7,Dortmund,2024-06-15,21,Italy 2 - 1 Albania
8,Hamburg,2024-06-19,15,Croatia 2 - 2 Albania
9,Gelsenkirchen,2024-06-20,21,Spain 1 - 0 Italy


In [7]:
combined_df.to_csv('data/metadata/euro2024_match_data.csv', index=False)

In [24]:
# load EURO 2024 match data
combined_df = pd.read_csv('data/metadata/euro2024_match_data.csv')
combined_df['day'] = [d.date() for d in pd.to_datetime(combined_df.day)]
combined_df

Unnamed: 0,city,day,hour_match,match
0,München,2024-06-14,21,Germany 5 - 1 Scotland
1,Köln,2024-06-15,15,Hungary 1 - 3 Switzerland
2,Stuttgart,2024-06-19,18,Germany 2 - 0 Hungary
3,Köln,2024-06-19,21,Scotland 1 - 1 Switzerland
4,Frankfurt am Main,2024-06-23,21,Switzerland 1 - 1 Germany
5,Stuttgart,2024-06-23,21,Scotland 0 - 1 Hungary
6,Berlin,2024-06-15,18,Spain 3 - 0 Croatia
7,Dortmund,2024-06-15,21,Italy 2 - 1 Albania
8,Hamburg,2024-06-19,15,Croatia 2 - 2 Albania
9,Gelsenkirchen,2024-06-20,21,Spain 1 - 0 Italy


## fanzones

Berlin (1)
https://www.berlin.de/kultur-und-tickets/tipps/em-2024/public-viewing-berlin/2606075-2605827-berliner-fanmeile-zur-fussball-em-2024.html

Dortmund (2)
https://www.dortmund.de/dortmund-erleben/uefa-euro-2024/fan-festival/

Düsseldorf (3)
https://duesseldorf.polizei.nrw/fan-zones-und-public-viewing-2

Frankfurt am Main (1)
https://www.visitfrankfurt.travel/erleben/feste-und-veranstaltungen/uefa-euro-2024-in-frankfurt

Gelsenkirchen (2)
https://gelsenkirchen.polizei.nrw/fan-zones-und-public-viewing-0
https://uefaeuro2024.gelsenkirchen.de/de/festival.aspx

Hamburg (1)
https://www.hamburg-tourism.de/sehen-erleben/veranstaltungen/uefa-euro-2024-fan-zone-hamburg/

Köln (3)
https://www.koeln.de/em-2024/public-viewing-koeln/

Leipzig (many)
https://rbleipzig.com/news/2023-2024/leipzig-fan-zone-public-viewing-events-uefa-europameisterschaft-2024/

München (1)
https://www.muenchen.de/veranstaltungen/uefaeuro2024/fan-zone-olympiapark-public-viewig-konzerte-programm

Stuttgart (4)
https://uefaeuro2024.stuttgart.de/host-city-stuttgart/