In [1]:
import os, pytz, pandas as pd, geopandas as gpd
from sqlalchemy import create_engine
from datetime import date, datetime, timedelta

# database connection

In [69]:
# database credentials
db_usr, db_pwd = os.getenv('DB_USR'), os.getenv('DB_PWD') # your database user name and password
# database login
host, port, db = 'nc-health-data-prod.cluster-ccsgl7rk4urn.eu-central-1.rds.amazonaws.com', 5432, 'master'

In [70]:
# for queries with output
engine = create_engine('postgresql://'+db_usr+':'+db_pwd+'@'+host+':'+str(port)+'/'+db)
conn = engine.connect()

In [76]:
conn.close()

# parameters

In [8]:
yeari, yearf = '2024', '2024'
weeki, weekf = '18', '31'

In [13]:
di = datetime.strptime(f'{yeari}-{weeki}-1', "%Y-%W-%w").date()
df = datetime.strptime(f'{yearf}-{weekf}-1', "%Y-%W-%w").date() + timedelta(6)
ds = [di+timedelta(dt) for dt in range((df-di).days+1)]
daylist = ds
print(di, 'until', df)

2024-04-29 until 2024-08-04


In [14]:
cdef = 'tl7_10m'# 'tl5_10m' 'tl6_10m' 'tl7_10m' 'tl8_10m' 'tl8_60m'
cdef_alt = '16m_10min'# tl5: 62 ... tl7: 16   tl8: 8

# load data

In [33]:
match_data = pd.read_csv('data/metadata/event_data.csv')

# generate data

## Figure 1: pings

In [15]:
cities = ['Stuttgart', 'Düsseldorf', 'Hamburg', 'Köln', 'Gelsenkirchen']
days_pos = ['2024-07-05', '2024-06-01', '2024-05-11', '2024-07-21', '2024-07-27']
days_neg = ['2024-06-28', '2024-05-25', '2024-05-04', '2024-07-14', '2024-07-20']

In [None]:
query = lambda city, day: f"""
    with restricted as (
    	select ex.*
    	from tuberlin_euro2024_tileid as ti
    	join ex_corona_sdkv6 as ex on ex.tile_id = ti.tile_id
    	where "day" = '{day}'
    ),
    ex_tmptmp as (
        select
                  "day"
                , stime
                , did
                , tl8
                , tile_id
            from restricted, unnest(stime_arr, tile_arr) u(stime, tl8)
    ),
    ex_tmp as (
        select
                  "day"
                , stime
                , did
                , st_transform(st_translate(st_setsrid(tile8togeo(tl8), 32632), tx.minx+3.90625, tx.miny+3.90625), 3857) as geopoint
                , tl8
            from ex_tmptmp
            join txc_dt_grid_1000m as tx on ex_tmptmp.tile_id = tx.tile_id
    ),
    cities2 as (
        select osm_id, "name", max(way_area) as way_area
        from tuberlin_euro2024_contour
        where "name" = '{city}'
        group by 1,2
    ),
    cities3 as (
        select c1."name", c1.way
        from tuberlin_euro2024_contour as c1
        join cities2 as c2 on c1.way_area = c2.way_area
    ),
    ex as (
        select
                  "day"
                , stime
                , did
                , "name" as city
                --, geopoint
                , st_x(st_transform(geopoint, 4326)) as lon, st_y(st_transform(geopoint, 4326)) as lat
                , tl8
        from ex_tmp
        join cities3 as c3 on st_contains(c3.way, ex_tmp.geopoint)
    )
    select *
    from ex
"""

In [None]:
for city, day_pos, day_neg in zip(cities, days_pos, days_neg):
    data = pd.DataFrame(pd.read_sql_query(query(city, day_pos), conn))
    data.to_csv(f'data/fig1/map_pings_{city.lower()}_{str(day_pos)}.csv', index=False)
    data = pd.DataFrame(pd.read_sql_query(query(city, day_neg), conn))
    data.to_csv(f'data/fig1/map_pings_{city.lower()}_{str(day_neg)}.csv', index=False)

## Figures 2, 3: contacts Germany (unique & non-unique)

In [None]:
# function to load list of contact events & their locations and ping sources
ld_cn = lambda d: f"""
    with cn_tmp as (
    	select
                  "day"
                , tl{cdef[2]}
                , stime
                , dids
                , sources
     			, bool_or(u.dist_stad < csa.radius_in_meter) as in_stadium
                , min(u.area_id) as area_id
     			, geopoint
            from covid_network_sdkv6_{cdef}, unnest(area_ids, dist_stads) u(area_id, dist_stad)
            left join cluster_search_areas_v2 csa on csa.area_id = u.area_id
            where
                    "day" = '{str(d)}'
            group by 1,2,3,4,5,8
    ),
    cn as (
        select
                  "day"
                , tl{cdef[2]}
                , stime
                , dids
                , sources
    			, in_stadium
                , area_id
                , st_x(st_transform(geopoint, 4326)) as lon, st_y(st_transform(geopoint, 4326)) as lat
        from cn_tmp
    )
    select *
    from cn
"""
clist2 = pd.DataFrame()
for d in ds:
    print(f'processing {d}')
    clist = pd.DataFrame(pd.read_sql_query(ld_cn(d), conn))

    #clist2 = clist.copy(deep=True)
    clist = clist.drop(columns=['area_id'])
    clist['in_stadium'] = clist.in_stadium.fillna(False)
    clist['sources'] = clist.sources.apply(lambda x: x[1:-1].split(','))
    clist['stime'] = clist.stime.apply(lambda x: x.astimezone(pytz.timezone('Europe/Berlin')))
    clist['day'] = clist.stime.apply(lambda x: x.date())
    clist = clist[(clist.day >= di) & (clist.day <= df)]
    clist = clist.explode(['dids','sources']).reset_index(drop=True)
    clist = clist.drop_duplicates()
    
    clist = clist.merge(clist.drop(columns=['in_stadium','lon','lat']), on=['day','stime',f'tl{cdef[2]}',])#,'tl7','inside_building','lon','lat'])
    clist = clist[clist.dids_x != clist.dids_y]
    clist = clist[clist.dids_x <= clist.dids_y]
    clist_true = clist[clist.in_stadium==True]
    clist_false = clist[clist.in_stadium==False]
    clist_true = clist_true[(clist_true.sources_x=='GPS') | (clist_true.sources_y=='GPS')]
    clist = pd.concat([clist_true, clist_false])
    clist = clist.drop(columns=['sources_x','sources_y'])
    clist = clist.drop_duplicates()
    clist2 = pd.concat([clist2, clist])
clist2

In [None]:
clist2.to_csv(f'data/fig2/colocations_germany_{cdef}.csv', index=False)

In [18]:
clist2 = pd.read_csv(f'data/fig2/colocations_germany_{cdef}.csv')
clist2['day'] = [d.date() for d in pd.to_datetime(clist2.day)]
clist2['stime'] = pd.to_datetime(clist2.stime)
clist2

Unnamed: 0,day,tl7,stime,dids_x,in_stadium,lon,lat,dids_y
0,2024-04-29,1301192320330,2024-04-29 19:10:00+02:00,02E3D5753C878D9E2E0F0CD886195FF8,True,8.724340,50.128207,56B1A48A69C7FAF20DD7A444F4F96ED2
1,2024-04-29,1842630000100,2024-04-29 09:50:00+02:00,CC2627B93217042559B1F57E2C6AAF66,True,7.003544,51.038475,FA599E9E214AD842B6C7C4A257B5DD19
2,2024-04-30,347542202210,2024-04-30 20:50:00+02:00,29A279DC53B7CE88CE580C0B457EFD25,True,11.625481,48.217823,B1EA1505B6FFEF15AA4B21314A5D6D10
3,2024-04-30,347542220020,2024-04-30 21:40:00+02:00,9ABA20F48E02D6A7911744BFC06EF808,True,11.625292,48.218249,FAF7698F29750F1F55002F9DE8F3F1FE
4,2024-04-30,351591111020,2024-04-30 21:20:00+02:00,87C2C7913F8C8DDC1E0787B4126E8484,True,11.624509,48.219392,F415BB8416B5EF0532E0EE2DB1DAE1D8
...,...,...,...,...,...,...,...,...
5563298,2024-08-04,3875383122330,2024-08-04 03:10:00+02:00,31EB19EDFC41D85B60B96C096BE40584,False,9.588738,54.854276,552C60227128DFFE36F44E4C34B98EAF
5563299,2024-08-04,3880561310030,2024-08-04 10:30:00+02:00,0829317228FFAE3A79E24CD19FBE31B6,False,8.312097,54.903463,AFAEC9193C1E0326BEBD9393D9E51058
5563300,2024-08-04,3880562232130,2024-08-04 09:10:00+02:00,0A83583D500992480D6FA65EF903E3D4,False,8.300782,54.909577,B016082737BA9ABE4145BA879A56ACAE
5563301,2024-08-04,3882932102130,2024-08-04 09:30:00+02:00,0829317228FFAE3A79E24CD19FBE31B6,False,8.317929,54.933265,AFAEC9193C1E0326BEBD9393D9E51058


In [21]:
# counting repeated pairs outside stadiums
weighted = clist2[clist2.in_stadium==False].drop_duplicates().groupby(['day']).dids_x.apply(lambda x: 2*len(x)).reset_index()
weighted = weighted.rename(columns={'dids_x':'contacts_nonunique'})
weighted[(weighted.day==date(2024,7,5))]

Unnamed: 0,day,contacts_nonunique
67,2024-07-05,128482


In [22]:
# counting unique pairs outside stadiums
unweighted = clist2[clist2.in_stadium==False].drop(columns=[f'tl{cdef[2]}','stime']).drop_duplicates().groupby(['day']).dids_x.apply(lambda x: 2*len(x)).reset_index()
unweighted = unweighted.rename(columns={'dids_x':'contacts_unique'})
unweighted[(unweighted.day==date(2024,7,5))]

Unnamed: 0,day,contacts_unique
67,2024-07-05,98218


In [23]:
# counting repeated pairs in stadiums
weighted_stad = clist2[clist2.in_stadium==True].drop_duplicates().groupby(['day']).dids_x.apply(lambda x: 2*len(x)).reset_index()
weighted_stad = weighted_stad.rename(columns={'dids_x':'contacts_nonunique_stadium'})
weighted_stad[(weighted_stad.day==date(2024,7,5))]

Unnamed: 0,day,contacts_nonunique_stadium
58,2024-07-05,362


In [24]:
# counting unique pairs in stadiums
unweighted_stad = clist2[clist2.in_stadium==True].drop(columns=[f'tl{cdef[2]}','stime']).drop_duplicates().groupby(['day']).dids_x.apply(lambda x: 2*len(x)).reset_index()
unweighted_stad = unweighted_stad.rename(columns={'dids_x':'contacts_unique_stadium'})
unweighted_stad[(unweighted_stad.day==date(2024,7,5))]

Unnamed: 0,day,contacts_unique_stadium
58,2024-07-05,316


In [25]:
for_andrzej_sensitivity = weighted.merge(unweighted, on=['day'], how='left')\
                                  .merge(weighted_stad, on=['day'], how='left').fillna(0)\
                                  .merge(unweighted_stad, on=['day'], how='left').fillna(0)
for_andrzej_sensitivity['contacts_nonunique_stadium'] = for_andrzej_sensitivity.contacts_nonunique_stadium.astype(int)
for_andrzej_sensitivity['contacts_unique_stadium'] = for_andrzej_sensitivity.contacts_unique_stadium.astype(int)
#for_andrzej_sensitivity = for_andrzej_sensitivity.merge(match_data[['day','city','match']], on=['city','day'], how='left')
#for_andrzej_sensitivity = for_andrzej_sensitivity.rename(columns={'match':'event'})
for_andrzej_sensitivity['resolution'] = cdef_alt
ordered = ['resolution','day','contacts_nonunique','contacts_nonunique_stadium','contacts_unique','contacts_unique_stadium']
for_andrzej_sensitivity = for_andrzej_sensitivity[ordered]
for_andrzej_sensitivity

Unnamed: 0,resolution,day,contacts_nonunique,contacts_nonunique_stadium,contacts_unique,contacts_unique_stadium
0,16m_10min,2024-04-29,117292,4,84826,4
1,16m_10min,2024-04-30,135878,48,102048,46
2,16m_10min,2024-05-01,103018,74,66694,70
3,16m_10min,2024-05-02,112532,0,82434,0
4,16m_10min,2024-05-03,121720,118,92140,114
...,...,...,...,...,...,...
93,16m_10min,2024-07-31,104652,498,77940,456
94,16m_10min,2024-08-01,106578,0,79892,0
95,16m_10min,2024-08-02,123670,90,95926,88
96,16m_10min,2024-08-03,126286,364,95496,352


In [26]:
for_andrzej_sensitivity.to_csv(f'data/fig2/ncontacts_germany_{cdef}.csv', index=False)

In [40]:
data_b_tl7_10m = pd.read_csv(f'data/fig2/ncontacts_germany_tl7_10m.csv')
data_b_tl8_10m = pd.read_csv(f'data/fig2/ncontacts_germany_tl8_10m.csv')
data_b_tl6_10m = pd.read_csv(f'data/fig2/ncontacts_germany_tl6_10m.csv')
data_b_tl8_60m = pd.read_csv(f'data/fig2/ncontacts_germany_tl8_60m.csv')
data_b_tl5_10m = pd.read_csv(f'data/fig2/ncontacts_germany_tl5_10m.csv')
data_b = pd.concat([data_b_tl7_10m, data_b_tl8_10m, data_b_tl6_10m, data_b_tl8_60m, data_b_tl5_10m])
data_b['day'] = [d.date() for d in pd.to_datetime(data_b.day)]

In [41]:
data_here = data_b.copy(deep=True)
#data_here = data_here.drop(columns=['resolution'])
data_here['wd'] = [d.weekday() for d in data_here.day]

# baseline = number contacts in city without stadiums by weekday, averaged over many weeks
baseline_nu = pd.DataFrame(data_here.groupby(['resolution','wd']).contacts_nonunique.mean()).rename(columns={'contacts_nonunique':'baseline_nonunique'}).reset_index()
data_here = data_here.merge(baseline_nu, on=['resolution','wd'])
baseline_u = pd.DataFrame(data_here.groupby(['resolution','wd']).contacts_unique.mean()).rename(columns={'contacts_unique':'baseline_unique'}).reset_index()
data_here = data_here.merge(baseline_u, on=['resolution','wd'])

data_here['to_baseline_nonunique'] = (data_here.contacts_nonunique + data_here.contacts_nonunique_stadium) / data_here.baseline_nonunique
data_here['to_baseline_unique'] = (data_here.contacts_unique + data_here.contacts_unique_stadium) / data_here.baseline_unique
data_here['contacts_nonunique'] = (data_here.contacts_nonunique + data_here.contacts_nonunique_stadium)
data_here['contacts_unique'] = (data_here.contacts_unique + data_here.contacts_unique_stadium)

ordered = ['resolution','day','contacts_nonunique','contacts_unique','to_baseline_nonunique','to_baseline_unique']
for_andrzej = data_here[ordered]
for_andrzej = for_andrzej.sort_values(['resolution','day']).reset_index(drop=True)
for_andrzej

Unnamed: 0,resolution,day,contacts_nonunique,contacts_unique,to_baseline_nonunique,to_baseline_unique
0,16m_10min,2024-04-29,117296,84830,1.099284,1.093875
1,16m_10min,2024-04-30,135926,102094,1.266744,1.297274
2,16m_10min,2024-05-01,103092,66764,0.937958,0.826169
3,16m_10min,2024-05-02,112532,82434,0.993173,0.976744
4,16m_10min,2024-05-03,121838,92254,0.949512,0.937602
...,...,...,...,...,...,...
485,8m_60min,2024-07-31,162784,139872,0.950318,0.956297
486,8m_60min,2024-08-01,168258,146124,0.950898,0.960743
487,8m_60min,2024-08-02,196900,173368,0.964589,0.970422
488,8m_60min,2024-08-03,193568,167440,1.038441,1.063260


In [42]:
for_andrzej[(for_andrzej.day==date(2024,7,5)) & (for_andrzej.resolution=='16m_10min')]

Unnamed: 0,resolution,day,contacts_nonunique,contacts_unique,to_baseline_nonunique,to_baseline_unique
67,16m_10min,2024-07-05,128844,98534,1.004111,1.001427


In [None]:
for_andrzej.to_csv(f'data/fig2/sensitivity_germany.csv', index=False, encoding='utf-8-sig')

## Figures 2, 3: contacts cities (unique & non-unique)

In [None]:
# function to load list of contact events & their locations and ping sources
ld_cn = f"""
    with cities1 as (
    	select osm_id, "name", way_area, way
    	from planet_osm_polygon
    	where osm_id in (-62428,-62422,-62649,-62400,-1829065,-2793104,-62578,-62539,-62782,-62522)
    ),
    cities2 as (
    	select osm_id, "name", max(way_area) as way_area
    	from cities1
    	group by 1,2
    ),
    cities3 as (
    	select c1."name", c1.way
    	from cities1 as c1
    	join cities2 as c2 on c1.way_area = c2.way_area
    ),
    cn_tmp as (
    	select
                  "day"
                , tl{cdef[2]}
                , stime
                , dids
                , sources
     			, bool_or(u.dist_stad < csa.radius_in_meter) as in_stadium
                , min(u.area_id) as area_id
     			, geopoint
            from covid_network_sdkv6_{cdef}, unnest(area_ids, dist_stads) u(area_id, dist_stad)
            left join cluster_search_areas_v2 csa on csa.area_id = u.area_id
            where
                    "day" between '{str(di-timedelta(1))}' and '{str(df)}'
            group by 1,2,3,4,5,8
    ),
    cn as (
        select
                  "day"
                , tl{cdef[2]}
                , stime
                , dids
                , sources
    			, in_stadium
                , area_id
                , "name" as city
                , st_x(st_transform(geopoint, 4326)) as lon, st_y(st_transform(geopoint, 4326)) as lat
        from cn_tmp
        join cities3 as c3 on st_contains(c3.way, cn_tmp.geopoint)
    )
    select *
    from cn
"""
clist = pd.DataFrame(pd.read_sql_query(ld_cn, conn))
clist

In [None]:
clist2 = clist.copy(deep=True)
clist2 = clist2.drop(columns=['area_id'])
clist2['in_stadium'] = clist2.in_stadium.fillna(False)
clist2['sources'] = clist2.sources.apply(lambda x: x[1:-1].split(','))
clist2['stime'] = clist2.stime.apply(lambda x: x.astimezone(pytz.timezone('Europe/Berlin')))
clist2['day'] = clist2.stime.apply(lambda x: x.date())
clist2 = clist2[(clist2.day >= di) & (clist2.day <= df)]
clist2 = clist2.explode(['dids','sources']).reset_index(drop=True)
clist2 = clist2.drop_duplicates()
clist2 = clist2.merge(clist2.drop(columns=['in_stadium','city','lon','lat']), on=['day','stime',f'tl{cdef[2]}',])#,'tl7','inside_building','lon','lat'])
clist2 = clist2[clist2.dids_x != clist2.dids_y]
clist2 = clist2[clist2.dids_x <= clist2.dids_y]
clist2_true = clist2[clist2.in_stadium==True]
clist2_false = clist2[clist2.in_stadium==False]
clist2_true = clist2_true[(clist2_true.sources_x=='GPS') | (clist2_true.sources_y=='GPS')]
clist2 = pd.concat([clist2_true, clist2_false])
clist2 = clist2.drop(columns=['sources_x','sources_y'])
clist2 = clist2.drop_duplicates()
clist2

In [None]:
clist2.to_csv(f'data/fig2/colocations_cities_{cdef}.csv', index=False)

In [27]:
clist2 = pd.read_csv(f'data/fig2/colocations_cities_{cdef}.csv')
clist2['day'] = [d.date() for d in pd.to_datetime(clist2.day)]
clist2['stime'] = pd.to_datetime(clist2.stime)

In [28]:
# counting repeated pairs outside stadiums
weighted = clist2[clist2.in_stadium==False].drop_duplicates().groupby(['city','day']).dids_x.apply(lambda x: 2*len(x)).reset_index()
weighted = weighted.rename(columns={'dids_x':'contacts_nonunique'})
weighted[(weighted.city=='Stuttgart') & (weighted.day==date(2024,7,5))]

Unnamed: 0,city,day,contacts_nonunique
949,Stuttgart,2024-07-05,872


In [29]:
# counting unique pairs outside stadiums
unweighted = clist2[clist2.in_stadium==False].drop(columns=[f'tl{cdef[2]}','stime']).drop_duplicates().groupby(['city','day']).dids_x.apply(lambda x: 2*len(x)).reset_index()
unweighted = unweighted.rename(columns={'dids_x':'contacts_unique'})
unweighted[(unweighted.city=='Stuttgart') & (unweighted.day==date(2024,7,5))]

Unnamed: 0,city,day,contacts_unique
949,Stuttgart,2024-07-05,794


In [30]:
# counting repeated pairs in stadiums
weighted_stad = clist2[clist2.in_stadium==True].drop_duplicates().groupby(['city','day']).dids_x.apply(lambda x: 2*len(x)).reset_index()
weighted_stad = weighted_stad.rename(columns={'dids_x':'contacts_nonunique_stadium'})
weighted_stad[(weighted_stad.city=='Stuttgart') & (weighted_stad.day==date(2024,7,5))]

Unnamed: 0,city,day,contacts_nonunique_stadium
129,Stuttgart,2024-07-05,34


In [31]:
# counting unique pairs in stadiums
unweighted_stad = clist2[clist2.in_stadium==True].drop(columns=[f'tl{cdef[2]}','stime']).drop_duplicates().groupby(['city','day']).dids_x.apply(lambda x: 2*len(x)).reset_index()
unweighted_stad = unweighted_stad.rename(columns={'dids_x':'contacts_unique_stadium'})
unweighted_stad[(unweighted_stad.city=='Stuttgart') & (unweighted_stad.day==date(2024,7,5))]

Unnamed: 0,city,day,contacts_unique_stadium
129,Stuttgart,2024-07-05,32


In [34]:
for_andrzej_sensitivity = weighted.merge(unweighted, on=['city','day'], how='left')\
                                  .merge(weighted_stad, on=['city','day'], how='left').fillna(0)\
                                  .merge(unweighted_stad, on=['city','day'], how='left').fillna(0)
for_andrzej_sensitivity['contacts_nonunique_stadium'] = for_andrzej_sensitivity.contacts_nonunique_stadium.astype(int)
for_andrzej_sensitivity['contacts_unique_stadium'] = for_andrzej_sensitivity.contacts_unique_stadium.astype(int)
for_andrzej_sensitivity = for_andrzej_sensitivity.merge(match_data[['day','city','match']], on=['city','day'], how='left')
for_andrzej_sensitivity = for_andrzej_sensitivity.rename(columns={'match':'event'})
for_andrzej_sensitivity['resolution'] = cdef_alt
ordered = ['resolution','city','day','event','contacts_nonunique','contacts_nonunique_stadium','contacts_unique','contacts_unique_stadium']
for_andrzej_sensitivity = for_andrzej_sensitivity[ordered]
for_andrzej_sensitivity

Unnamed: 0,resolution,city,day,event,contacts_nonunique,contacts_nonunique_stadium,contacts_unique,contacts_unique_stadium
0,16m_10min,Berlin,2024-04-29,,6086,0,5404,0
1,16m_10min,Berlin,2024-04-30,,6004,0,5222,0
2,16m_10min,Berlin,2024-05-01,,3182,0,2428,0
3,16m_10min,Berlin,2024-05-02,,5222,0,4460,0
4,16m_10min,Berlin,2024-05-03,,6342,0,5366,0
...,...,...,...,...,...,...,...,...
975,16m_10min,Stuttgart,2024-07-31,,536,0,498,0
976,16m_10min,Stuttgart,2024-08-01,,510,0,458,0
977,16m_10min,Stuttgart,2024-08-02,,440,0,406,0
978,16m_10min,Stuttgart,2024-08-03,,1378,0,1146,0


In [None]:
for_andrzej_sensitivity.to_csv(f'data/fig2/ncontacts_cities_{cdef}.csv', index=False)

In [39]:
data_b_tl7_10m = pd.read_csv(f'data/fig2/ncontacts_cities_tl7_10m.csv')
data_b_tl8_10m = pd.read_csv(f'data/fig2/ncontacts_cities_tl8_10m.csv')
data_b_tl6_10m = pd.read_csv(f'data/fig2/ncontacts_cities_tl6_10m.csv')
data_b_tl8_60m = pd.read_csv(f'data/fig2/ncontacts_cities_tl8_60m.csv')
data_b_tl5_10m = pd.read_csv(f'data/fig2/ncontacts_cities_tl5_10m.csv')
data_b = pd.concat([data_b_tl7_10m, data_b_tl8_10m, data_b_tl6_10m, data_b_tl8_60m, data_b_tl5_10m])
data_b['day'] = [d.date() for d in pd.to_datetime(data_b.day)]

In [36]:
data_here = data_b.copy(deep=True)
#data_here = data_here.drop(columns=['resolution'])
data_here['wd'] = [d.weekday() for d in data_here.day]

# baseline = number contacts in city without stadiums by weekday, averaged over many weeks
baseline_nu = pd.DataFrame(data_here[data_here.event.isna()].groupby(['resolution','city','wd']).contacts_nonunique.mean()).rename(columns={'contacts_nonunique':'baseline_nonunique'}).reset_index()
data_here = data_here.merge(baseline_nu, on=['resolution','city','wd'])
baseline_u = pd.DataFrame(data_here[data_here.event.isna()].groupby(['resolution','city','wd']).contacts_unique.mean()).rename(columns={'contacts_unique':'baseline_unique'}).reset_index()
data_here = data_here.merge(baseline_u, on=['resolution','city','wd'])

data_here['to_baseline_nonunique'] = (data_here.contacts_nonunique + data_here.contacts_nonunique_stadium) / data_here.baseline_nonunique
data_here['to_baseline_unique'] = (data_here.contacts_unique + data_here.contacts_unique_stadium) / data_here.baseline_unique
data_here['contacts_nonunique'] = (data_here.contacts_nonunique + data_here.contacts_nonunique_stadium)
data_here['contacts_unique'] = (data_here.contacts_unique + data_here.contacts_unique_stadium)

ordered = ['resolution','city','day','event','contacts_nonunique','contacts_unique','to_baseline_nonunique','to_baseline_unique']
for_andrzej = data_here[ordered]
for_andrzej = for_andrzej.sort_values(['resolution','city','day']).reset_index(drop=True)
for_andrzej

Unnamed: 0,resolution,city,day,event,contacts_nonunique,contacts_unique,to_baseline_nonunique,to_baseline_unique
0,16m_10min,Berlin,2024-04-29,,6086,2624,1.222768,1.064337
1,16m_10min,Berlin,2024-04-30,,6004,3042,1.123083,1.137229
2,16m_10min,Berlin,2024-05-01,,3182,1370,0.641921,0.525969
3,16m_10min,Berlin,2024-05-02,,5222,2604,0.932833,0.948683
4,16m_10min,Berlin,2024-05-03,,6342,2920,1.060726,1.026723
...,...,...,...,...,...,...,...,...
4920,8m_60min,Stuttgart,2024-07-31,,816,618,0.713968,0.820618
4921,8m_60min,Stuttgart,2024-08-01,,844,594,0.750111,0.795536
4922,8m_60min,Stuttgart,2024-08-02,,698,490,0.644885,0.674931
4923,8m_60min,Stuttgart,2024-08-03,SDP,2116,1348,2.059094,1.965536


In [37]:
for_andrzej[(for_andrzej.city=='Stuttgart') & (for_andrzej.day==date(2024,7,5)) & (for_andrzej.resolution=='16m_10min')]

Unnamed: 0,resolution,city,day,event,contacts_nonunique,contacts_unique,to_baseline_nonunique,to_baseline_unique
954,16m_10min,Stuttgart,2024-07-05,ESP-GER,906,538,1.362592,1.393782


In [None]:
for_andrzej.to_csv(f'data/fig2/sensitivity_cities.csv', index=False, encoding='utf-8-sig')

# Figure 4: contacts cities with location info

In [None]:
ld_cn = f"""
    with cities1 as (
    	select osm_id, "name", way_area, way
    	from planet_osm_polygon
    	where osm_id in (-62428,-62422,-62649,-62400,-1829065,-2793104,-62578,-62539,-62782,-62522)
    ),
    cities2 as (
    	select osm_id, "name", max(way_area) as way_area
    	from cities1
    	group by 1,2
    ),
    cities3 as (
    	select c1."name", c1.way
    	from cities1 as c1
    	join cities2 as c2 on c1.way_area = c2.way_area
    ),
    cn_tmp as (
    	select
                  "day"
                , tl{cdef[2]}
                , stime
                , dids
                , sources
     			, bool_or(u.dist_stad < csa.radius_in_meter) as in_stadium
                , min(u.area_id) as area_id
     			, geopoint
            from covid_network_sdkv6_{cdef}, unnest(area_ids, dist_stads) u(area_id, dist_stad)
            left join cluster_search_areas_v2 csa on csa.area_id = u.area_id
            where
                    "day" between '{str(di-timedelta(1))}' and '{str(df)}'
            group by 1,2,3,4,5,8
    ),
    cn as (
        select
                  "day"
                , stime
                , dids
                , st_x(st_transform(geopoint, 4326)) as lon
                , st_y(st_transform(geopoint, 4326)) as lat
                , tl{cdef[2]}
                , sources
    			, in_stadium
                , area_id
                , "name" as city
        from cn_tmp
        join cities3 as c3 on st_contains(c3.way, cn_tmp.geopoint)
    )
    select *
    from cn
"""
clist = pd.DataFrame(pd.read_sql_query(ld_cn, conn))
clist

In [None]:
clist.to_csv('data/fig4/contacts_cities_stadiums_tmp.csv', index=False)# 'output/09_contacts_cities_stadiums_tmp.csv'

In [43]:
clist = pd.read_csv('data/fig4/contacts_cities_stadiums_tmp.csv')

In [44]:
data = clist.copy(deep=True)
data['day'] = [d.date() for d in pd.to_datetime(data.day)]
data['stime'] = [d for d in pd.to_datetime(data.stime)]
data['dids'] = data.dids.apply(lambda x: x[2:-2].split("', '"))
data['sources'] = data.sources.apply(lambda x: x[1:-1].split(','))

In [45]:
data.stime[0]

Timestamp('2024-04-29 10:10:00+0000', tz='UTC')

In [47]:
data['stime'] = data.stime.apply(lambda x: x.astimezone(pytz.timezone('Europe/Berlin')))
data['day'] = data.stime.apply(lambda x: x.date())
data = data[(data.day >= di) & (data.day <= df)]

In [48]:
data.stime[0]

Timestamp('2024-04-29 12:10:00+0200', tz='Europe/Berlin')

In [49]:
#data = data.explode(['dids']).reset_index(drop=True)
data = data.explode(['dids','sources']).reset_index(drop=True)
data = data.drop_duplicates()
#data = data.merge(data.drop(columns=['lon','lat','city']), on=['day','stime','tl7',])
data = data.merge(data.drop(columns=['lon','lat','in_stadium','area_id','city']), on=['day','stime','tl7',])#,'tl7','inside_building','lon','lat'])
data = data[data.dids_x != data.dids_y]
data['in_stadium'] = data.in_stadium.fillna(False)
data

Unnamed: 0,day,stime,dids_x,lon,lat,tl7,sources_x,in_stadium,area_id,city,dids_y,sources_y
1,2024-04-29,2024-04-29 12:10:00+02:00,738019E5463679AF017CC32038DAD845,11.525800,48.077357,283122232220,FUSED,False,,München,F6C5FDE82C086B947D0530ABFB89B607,FUSED
2,2024-04-29,2024-04-29 12:10:00+02:00,F6C5FDE82C086B947D0530ABFB89B607,11.525800,48.077357,283122232220,FUSED,False,,München,738019E5463679AF017CC32038DAD845,FUSED
5,2024-04-29,2024-04-29 20:30:00+02:00,3BD179457641EF10B617F363C0E89918,11.502958,48.082215,287170332300,GPS,False,,München,CA518459DC95B39009CE8183FC9C9E66,NET
6,2024-04-29,2024-04-29 20:30:00+02:00,CA518459DC95B39009CE8183FC9C9E66,11.502958,48.082215,287170332300,NET,False,,München,3BD179457641EF10B617F363C0E89918,GPS
9,2024-04-29,2024-04-29 10:10:00+02:00,4DBC8F9ED17163B6753A2EA454E67709,11.524983,48.094949,291252222000,FUSED,False,,München,97C848F79E42A562804A4E8FCD63AA07,NET
...,...,...,...,...,...,...,...,...,...,...,...,...
3711035,2024-08-04,2024-08-04 09:10:00+02:00,65B0297499046055C6A88E141525BB41,10.017480,53.671207,3474720231320,FUSED,False,,Hamburg,814C4432267C699C301E5891BC9E834C,GPS
3711036,2024-08-04,2024-08-04 09:10:00+02:00,814C4432267C699C301E5891BC9E834C,10.017480,53.671207,3474720231320,NET,False,,Hamburg,65B0297499046055C6A88E141525BB41,NET
3711037,2024-08-04,2024-08-04 09:10:00+02:00,814C4432267C699C301E5891BC9E834C,10.017480,53.671207,3474720231320,NET,False,,Hamburg,65B0297499046055C6A88E141525BB41,FUSED
3711040,2024-08-04,2024-08-04 09:10:00+02:00,814C4432267C699C301E5891BC9E834C,10.017480,53.671207,3474720231320,GPS,False,,Hamburg,65B0297499046055C6A88E141525BB41,NET


In [50]:
data_true = data[data.in_stadium==True]
data_false = data[data.in_stadium==False]
data_true = data_true[(data_true.sources_x=='GPS') | (data_true.sources_y=='GPS')]
data = pd.concat([data_true, data_false])

In [51]:
#data = data[(data.sources_x=='GPS') | (data.sources_y=='GPS')]
pairs = []
for did1, did2 in zip(data.dids_x, data.dids_y):
    pair = f'{did1}_{did2}' if did1 < did2 else f'{did2}_{did1}'
    #print(did1, did2, pair)
    pairs.append(pair)
data.loc[:,'pair'] = pairs
#data = data.drop(columns=['dids_x','dids_y','tl7'])
data = data.drop(columns=['dids_x','dids_y','sources_x','sources_y'])#,'tl7'
data = data.drop_duplicates()
dmin = data.day.min()
#data['tt'] = data.day.apply(lambda d: (d-dmin).days)*24 + data.stime.dt.hour
data['tt'] = data.day.apply(lambda d: (d-dmin).days)*24*6 + data.stime.dt.hour*6 + (data.stime.dt.hour//10)
data['hour'] = data.stime.dt.hour
print(data.tt.max(), ((data.day.max()-dmin).days+1)*24, ((data.day.max()-dmin).days+1)*720)
data

14108 2352 70560


Unnamed: 0,day,stime,lon,lat,tl7,in_stadium,area_id,city,pair,tt,hour
3208,2024-04-29,2024-04-29 19:10:00+02:00,8.724340,50.128207,1301192320330,True,64.0,Frankfurt am Main,02E3D5753C878D9E2E0F0CD886195FF8_56B1A48A69C7F...,115,19
40980,2024-04-30,2024-04-30 20:50:00+02:00,11.625481,48.217823,347542202210,True,1.0,München,29A279DC53B7CE88CE580C0B457EFD25_B1EA1505B6FFE...,266,20
41040,2024-04-30,2024-04-30 21:40:00+02:00,11.625292,48.218249,347542220020,True,1.0,München,9ABA20F48E02D6A7911744BFC06EF808_FAF7698F29750...,272,21
41136,2024-04-30,2024-04-30 21:20:00+02:00,11.624509,48.219392,351591111020,True,1.0,München,87C2C7913F8C8DDC1E0787B4126E8484_F415BB8416B5E...,272,21
41157,2024-04-30,2024-04-30 22:40:00+02:00,11.624719,48.219387,351591111030,True,1.0,München,16BEDFEC806693856846421D62ED6082_1774C49EE7352...,278,22
...,...,...,...,...,...,...,...,...,...,...,...
3711001,2024-08-04,2024-08-04 14:00:00+02:00,9.930505,53.652673,3464431200200,False,,Hamburg,4A986EF708185DA33ADBC34034B804D1_7BE17A3A368CB...,14053,14
3711005,2024-08-04,2024-08-04 16:00:00+02:00,9.930505,53.652673,3464431200200,False,,Hamburg,0422E4FF59DE153BCCF3331DF5DF5AD2_899CA0D2D186C...,14065,16
3711011,2024-08-04,2024-08-04 18:00:00+02:00,10.093223,53.655090,3464542122020,False,,Hamburg,6DB5F8D27B235EDEB9A8932A8E47D999_EDF25B1C7FC36...,14077,18
3711025,2024-08-04,2024-08-04 16:50:00+02:00,10.161662,53.650513,3464581300120,False,,Hamburg,4D034C69DF85B350046E371A98866629_9174148678A5D...,14065,16


In [52]:
data.to_csv('data/fig4/contacts_cities_stadiums.csv', index=False)

# Figure 5: contacts cities mapped with OSM

In [61]:
# load EURO 2024 match data
combined_df = pd.read_csv('output/00_euro2024_match_data.csv')
combined_df['day'] = [d.date() for d in pd.to_datetime(combined_df.day)]

In [62]:
combined_df

Unnamed: 0,city,day,hour_match,match
0,München,2024-06-14,21,Germany 5 - 1 Scotland
1,Köln,2024-06-15,15,Hungary 1 - 3 Switzerland
2,Stuttgart,2024-06-19,18,Germany 2 - 0 Hungary
3,Köln,2024-06-19,21,Scotland 1 - 1 Switzerland
4,Frankfurt am Main,2024-06-23,21,Switzerland 1 - 1 Germany
5,Stuttgart,2024-06-23,21,Scotland 0 - 1 Hungary
6,Berlin,2024-06-15,18,Spain 3 - 0 Croatia
7,Dortmund,2024-06-15,21,Italy 2 - 1 Albania
8,Hamburg,2024-06-19,15,Croatia 2 - 2 Albania
9,Gelsenkirchen,2024-06-20,21,Spain 1 - 0 Italy


In [63]:
# load event data
df_events = pd.read_csv('output/00_event_data.csv')
df_events = df_events[['city','day','match']]
df_events['day'] = [d.date() for d in pd.to_datetime(df_events.day)]

In [64]:
# execute if using non-event baseline (instead of non-Germany match baseline)
dbase = pd.DataFrame(pd.date_range(date(2024,6,10), date(2024,8,4)), columns=['day'])
dbase['day'] = dbase.day.apply(lambda x: x.date())
dbase = dbase.merge(pd.DataFrame(list(set(combined_df.city)), columns=['city']), how='cross')
dbase = dbase.merge(df_events, on=['day','city'], how='left')
dbase = dbase[dbase.match.isna()].drop(columns='match')
dbase['wd'] = dbase.day.apply(lambda x: x.weekday())

combined_df = combined_df[combined_df.match.apply(lambda x: 'Germany' in x)].copy(deep=True)
combined_df_base = combined_df.copy(deep=True)
combined_df_base['wd'] = combined_df_base.day.apply(lambda x: x.weekday())
combined_df_base = combined_df_base.drop(columns=['day','match']).merge(dbase, on=['city','wd'])
combined_df_base['match'] = 'base'
combined_df_base = combined_df_base.drop(columns=['wd'])
combined_df = pd.concat([combined_df, combined_df_base])
combined_df

Unnamed: 0,city,day,hour_match,match
0,München,2024-06-14,21,Germany 5 - 1 Scotland
2,Stuttgart,2024-06-19,18,Germany 2 - 0 Hungary
4,Frankfurt am Main,2024-06-23,21,Switzerland 1 - 1 Germany
36,Dortmund,2024-06-29,21,Germany 2 - 0 Denmark
44,Stuttgart,2024-07-05,18,Spain 2 - 1 Germany
0,München,2024-06-21,21,base
1,München,2024-06-28,21,base
2,München,2024-07-12,21,base
3,München,2024-07-19,21,base
4,München,2024-07-26,21,base


In [65]:
print((combined_df.match!='base').sum(), (combined_df.match=='base').sum())

5 27


In [66]:
data = pd.read_csv('data/fig4/contacts_cities_stadiums.csv')
data['day'] = [d.date() for d in pd.to_datetime(data.day)]
data['stime'] = pd.to_datetime(data.stime)
data['hour'] = data.stime.dt.hour

In [67]:
data[(data.day==date(2024,7,5))]

Unnamed: 0,day,stime,lon,lat,tl7,in_stadium,area_id,city,pair,tt,hour
2151,2024-07-05,2024-07-05 18:00:00+02:00,9.231033,48.791436,610751311320,True,11.0,Stuttgart,8E8428D7AF6EF8C595409AC8319EB576_9A317535E9699...,9757,18
2152,2024-07-05,2024-07-05 18:40:00+02:00,9.231034,48.791717,610751313120,True,11.0,Stuttgart,4FBA5DD7D79D201ED43E872A1C7D771B_8E8428D7AF6EF...,9757,18
2153,2024-07-05,2024-07-05 18:40:00+02:00,9.231034,48.791717,610751313120,True,11.0,Stuttgart,845F65721C390DF22D50B672FDB90F0E_8E8428D7AF6EF...,9757,18
2154,2024-07-05,2024-07-05 16:40:00+02:00,9.231250,48.792420,610751331310,True,11.0,Stuttgart,0AF0CC3A14CDC9FB74EA4BA182E2D57D_585C22CBCA859...,9745,16
2155,2024-07-05,2024-07-05 17:10:00+02:00,9.231251,48.792560,610751331330,True,11.0,Stuttgart,5A4FA55D3BE8CC4A4BF58B9475C7B78B_A7538A1E5657D...,9751,17
...,...,...,...,...,...,...,...,...,...,...,...
546411,2024-07-05,2024-07-05 14:30:00+02:00,10.000677,53.680759,3479690223300,False,,Hamburg,11325D0BCEE72D972FA7A4ED4D832C0B_4543BC3576129...,9733,14
546412,2024-07-05,2024-07-05 08:20:00+02:00,10.030681,53.679100,3479710203010,False,,Hamburg,11325D0BCEE72D972FA7A4ED4D832C0B_E89FCF119DF79...,9696,8
546413,2024-07-05,2024-07-05 16:10:00+02:00,10.105260,53.708491,3494473131210,False,,Hamburg,A20DC4F325AC04318FAF094D8F201121_A9F3AF69D077E...,9745,16
546414,2024-07-05,2024-07-05 16:10:00+02:00,10.119359,53.704568,3494481132230,False,,Hamburg,1A3ABEF8565570A1107CE172C4CD9F75_A9F3AF69D077E...,9745,16


In [68]:
# count the contacts by city, day and hour and join them on the match metadata by city and day
data2 = combined_df.merge(data, on=['city','day'])
# define a time relative to start of the match
data2['hour_rel'] = data2.hour - data2.hour_match
# define a flag on whether the match involved Germany
data2['germany'] = data2.match.apply(lambda x: 'Germany' in x)
data2

Unnamed: 0,city,day,hour_match,match,stime,lon,lat,tl7,in_stadium,area_id,pair,tt,hour,hour_rel,germany
0,München,2024-06-14,21,Germany 5 - 1 Scotland,2024-06-14 20:10:00+02:00,11.624227,48.217992,347533312330,True,1.0,3BAD71931D0048B5A4317A7469EED1A2_539A946B36F4C...,6746,20,-1,True
1,München,2024-06-14,21,Germany 5 - 1 Scotland,2024-06-14 20:10:00+02:00,11.624227,48.217992,347533312330,True,1.0,539A946B36F4C522B1855713DC97B3C4_E81007EE27FCD...,6746,20,-1,True
2,München,2024-06-14,21,Germany 5 - 1 Scotland,2024-06-14 20:40:00+02:00,11.624227,48.217992,347533312330,True,1.0,355840D77E2DE2E68AFF26F421C135CE_539A946B36F4C...,6746,20,-1,True
3,München,2024-06-14,21,Germany 5 - 1 Scotland,2024-06-14 20:50:00+02:00,11.624227,48.217992,347533312330,True,1.0,355840D77E2DE2E68AFF26F421C135CE_539A946B36F4C...,6746,20,-1,True
4,München,2024-06-14,21,Germany 5 - 1 Scotland,2024-06-14 21:40:00+02:00,11.624227,48.217992,347533312330,True,1.0,355840D77E2DE2E68AFF26F421C135CE_3BAD71931D004...,6752,21,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11177,Stuttgart,2024-08-02,18,base,2024-08-02 15:30:00+02:00,9.228916,48.840637,632743300130,False,,7D7CF597A9DEDDBC943B40A5753C6C1A_AB81402CDE99A...,13771,15,-3,False
11178,Stuttgart,2024-08-02,18,base,2024-08-02 10:40:00+02:00,9.235070,48.835986,632750300000,False,,640FC0ABE2E85B0EF5D32C90939D2C5F_E7C338D80D86B...,13741,10,-8,False
11179,Stuttgart,2024-08-02,18,base,2024-08-02 14:20:00+02:00,9.233395,48.842174,632752232000,False,,790BE2F547CED740931275417750C435_EF67E7ED82CB4...,13765,14,-4,False
11180,Stuttgart,2024-08-02,18,base,2024-08-02 02:00:00+02:00,9.208908,48.842783,637080101010,False,,1939EDD5F370FF7F429A46E05829B8B5_AA5764D101FCC...,13692,2,-16,False


In [71]:
data2[['city','day','hour_match','match','germany','stime','hour','hour_rel','tl7','in_stadium','area_id','pair']].to_sql('euro2024_09_fig5_fig6_temporal3', engine, if_exists='replace', index=False)

182

In [73]:
data2.groupby(['germany']).match.apply(len).reset_index().rename(columns={0:'norm'})

Unnamed: 0,germany,match
0,False,8947
1,True,2235


In [74]:
data2[(data2.city.isin(['Stuttgart','Frankfurt am Main','Dortmund','München']))]\
    .groupby(['germany']).match.apply(len).reset_index().rename(columns={0:'norm'})

Unnamed: 0,germany,match
0,False,8947
1,True,2235


In [75]:
df_norm = data2[(data2.hour_rel.isin([-2,-1,0,1,2,3])) & (data2.city.isin(['Stuttgart','Frankfurt am Main','Dortmund','München']))]\
    .groupby(['germany']).match.apply(len).reset_index().rename(columns={0:'norm'})
df_norm

Unnamed: 0,germany,match
0,False,2368
1,True,866


In [61]:
df_norm_bytime = data2[(data2.hour_rel.isin([-2,-1,0,1,2,3])) & (data2.city.isin(['Stuttgart','Frankfurt am Main','Dortmund','München']))].copy(deep=True)
to_match_rel = {-2:'before match', -1:'before match', 0:'during match', 1:'during match', 2:'after match', 3:'after match'}
df_norm_bytime['match_rel'] = [to_match_rel[h] for h in df_norm_bytime.hour_rel]
df_norm_bytime = df_norm_bytime.groupby(['germany','match_rel']).apply(len).reset_index().rename(columns={0:'norm'})
df_norm_bytime

Unnamed: 0,germany,match_rel,norm
0,False,after match,553
1,False,before match,1176
2,False,during match,607
3,True,after match,105
4,True,before match,601
5,True,during match,160


In [62]:
data2[(data2.city.isin(['Stuttgart','Frankfurt am Main','Dortmund','München']))][['day','city','germany']].drop_duplicates()\
    .groupby('germany').apply(len)

germany
False    17
True      5
dtype: int64

In [None]:
query = """
    with restr as (
    	select city, germany, hour_rel, tl7, tl7/10000000 as tile_id
    	from euro2024_09_fig5_fig6_temporal
    	--limit 1
    )
    , tile7s as (
    	select restr.city, restr.tile_id, restr.germany, restr.hour_rel, restr.tl7, t2o.tile, t2o.osm_ids
    	from restr
    	join tuberlin_euro2024_tile2osmid t2o on restr.tile_id = t2o.tile_id
    )
    , tile8s as (
    	select *, (tl7::text || unnest(array[0,1,2,3])::text)::int8 as tl8
    	from tile7s
    )
    , tile7s_mapped as (
    	select city, tile_id, germany, hour_rel, tl7 as tl, unnest(osm_ids) as osm_id
    	from tile7s
    	where tile = tl7
    	or tile = tl7/10
    	or tile = tl7/100
    	or tile = tl7/1000
    	or tile = tl7/10000
    	or tile = tl7/100000
    	or tile = tl7/1000000
    )
    , tile8s_mapped as (
    	select city, tile_id, germany, hour_rel, tl8 as tl, unnest(osm_ids) as osm_id
    	from tile8s
    	where tile = tl8
    )
    , mapped as (
    	select *
    	from tile7s_mapped
    	union all
    	select *
    	from tile8s_mapped
    )
    select m.city, m.germany, m.hour_rel, m.tl, osm.*
    from mapped as m
    join tuberlin_euro2024_osm as osm on osm.osm_id = m.osm_id and osm.tile_id = m.tile_id
"""
data_osm = pd.DataFrame(pd.read_sql_query(query, conn))
data_osm

In [None]:
data_osm[list(set([col for col in data_osm.columns if col!='way']))].to_csv('data/fig5/contacts_matchdays_osmmapped.csv', index=False)

In [None]:
query = """
    with restr as (
    	select city, germany, hour_rel, tl7, tl7/10000000 as tile_id
    	from euro2024_09_fig5_fig6_temporal
    	--limit 1
    )
    , tile7s as (
    	select restr.city, restr.tile_id, restr.germany, restr.hour_rel, restr.tl7, t2o.tile, t2o.osm_ids
    	from restr
    	join tuberlin_euro2024_tile2osmid t2o on restr.tile_id = t2o.tile_id
    )
    , tile8s as (
    	select *, (tl7::text || unnest(array[0,1,2,3])::text)::int8 as tl8
    	from tile7s
    )
    , tile7s_mapped as (
    	select city, tile_id, germany, hour_rel, tl7 as tl, unnest(osm_ids) as osm_id
    	from tile7s
    	where tile = tl7
    	or tile = tl7/10
    	or tile = tl7/100
    	or tile = tl7/1000
    	or tile = tl7/10000
    	or tile = tl7/100000
    	or tile = tl7/1000000
    )
    , tile8s_mapped as (
    	select city, tile_id, germany, hour_rel, tl8 as tl, unnest(osm_ids) as osm_id
    	from tile8s
    	where tile = tl8
    )
    , mapped as (
    	select *
    	from tile7s_mapped
    	union all
    	select *
    	from tile8s_mapped
    )
    select m.*, poi.osm_id_point
    	   , poi.amenity as amenity_point, poi.service as service_point, poi.shop as shop_point, poi.tourism as tourism_point
    from mapped as m
    join euro2024_poi_all_smallestpolygon_cities as poi on poi.osm_id_polygon = m.osm_id
"""
data_osmpoint = pd.DataFrame(pd.read_sql_query(query, conn))
data_osmpoint

In [None]:
data_osmpoint.to_csv('data/fig5/contacts_matchdays_osmpointmapped2.csv', index=False)

In [None]:
query = """
    with restr as (
    	select city, germany, hour_rel, tl7, tl7/10000000 as tile_id, "day"
    		, (string_to_array(pair, '_'))[1] as did1, (string_to_array(pair, '_'))[2] as did2
    	from euro2024_09_fig5_fig6_temporal2 as cn
    	--limit 2
    )
    , cn3 as (
    	select restr.*, st_translate(st_setsrid(tile7togeo(tl7/10), 32632), tx.minx, tx.miny) as contact
    	from restr
    	join txc_dt_grid_1000m as tx on tx.tile_id = restr.tile_id
    )
    , cn_hw as (
	    select city, germany, hour_rel, tl7 as tl, tile_id --, cn3.did
	    		, st_distance(cn3.contact, h1.weighted_centroid) <= 100 as is_home1
	    		, st_distance(cn3.contact, w1.weighted_centroid) <= 100 as is_work1
	    		, st_distance(cn3.contact, h2.weighted_centroid) <= 100 as is_home2
	    		, st_distance(cn3.contact, w2.weighted_centroid) <= 100 as is_work2
	    from cn3
	    left join home_work_sdkv6 as h1
	    on h1.did = cn3.did1 and h1.valid_for = (date_trunc('month', "day") + interval '1 month - 1 day')::date
	    left join home_work_sdkv6 as h2
	    on h2.did = cn3.did2 and h2.valid_for = (date_trunc('month', "day") + interval '1 month - 1 day')::date
	    left join home_work_sdkv6 as w1
	    on w1.did = cn3.did1 and w1.valid_for = (date_trunc('month', "day") + interval '1 month - 1 day')::date
	    left join home_work_sdkv6 as w2
	    on w2.did = cn3.did2 and w2.valid_for = (date_trunc('month', "day") + interval '1 month - 1 day')::date
	    where h1.place='home' and h2.place='home' and w1.place='work' and w2.place='work'
	)
	select city, germany, hour_rel, tl, tile_id, (is_home1 or is_home2) as is_home, (is_work1 or is_work2) as is_work
    from cn_hw
"""
data_hw = pd.DataFrame(pd.read_sql_query(query, conn))
data_hw

In [None]:
data_hw.to_csv('data/fig5/contacts_matchdays_homework.csv', index=False)

- `route='light_rail'`: 579593100010 (far from stadium)
- `amenity='parking'`: 610751303310 (near stadium)
- `leisure='stadium'`: 6107513132(20) (inside stadium)
- `amenity='fast_food'`: 601852203230 (city center)

In [36]:
tile_cs = [579593100010, 610751303310, 610751313220, 601852203230]

In [37]:
query_polygon = lambda tile_c: f"""
    with t2osm as (
        select t2o.tile_id, tile, unnest(osm_ids) as osm_id
            , st_transform(st_translate(st_setsrid(tile6togeo(t2o.tile), 32632), tx.minx, tx.miny), 3857) as geopoint_t2o
            , st_transform(st_translate(st_setsrid(tile{len(str(tile_c))-4}togeo({tile_c}), 32632), tx.minx, tx.miny), 3857) as geopoint_c
        from tuberlin_euro2024_tile2osmid as t2o
        join txc_dt_grid_1000m as tx on t2o.tile_id = tx.tile_id
        where tile = {tile_c}
        or tile = {tile_c}/10
        or tile = {tile_c}/100
        or tile = {tile_c}/1000
    )
    , res as (
        select osm.way, tile72polygon(t2osm.geopoint_c), t2osm.*
        from t2osm
        join tuberlin_euro2024_osm as osm on osm.osm_id = t2osm.osm_id and osm.tile_id = t2osm.tile_id
        --where osm.route='light_rail'   --osm.leisure='stadium'--osm.barrier='wall'--osm.amenity='parking'--osm.route='light_rail'
    )
    select way as geom, osm_id
    from res
    union all
    select tile72polygon as geom, osm_id
    from res
"""

query_point = lambda tile_c: f"""
    with t2osm as (
        select t2o.tile_id, tile, unnest(osm_ids) as osm_id
            , st_transform(st_translate(st_setsrid(tile6togeo(t2o.tile), 32632), tx.minx, tx.miny), 3857) as geopoint_t2o
            , st_transform(st_translate(st_setsrid(tile{len(str(tile_c))-4}togeo({tile_c}), 32632), tx.minx, tx.miny), 3857) as geopoint_c
        from tuberlin_euro2024_tile2osmid as t2o
        join txc_dt_grid_1000m as tx on t2o.tile_id = tx.tile_id
        where tile = {tile_c}
    )
    , res as (
        select poi.way_point, tile72polygon(t2osm.geopoint_c), poi.way_polygon, t2osm.*
        from t2osm
        join euro2024_poi_all_smallestpolygon as poi on poi.osm_id_polygon = t2osm.osm_id
        --where poi.amenity='fast_food'
    )
    select way_point as geom, osm_id
    from res
    union all
    select tile72polygon as geom, osm_id
    from res
"""

In [35]:
for tile_c in tile_cs:
    print(f'processing tile {tile_c}')

    gdf_polygons = gpd.read_postgis(query_polygon(tile_c), engine, geom_col='geom')
    lpol = len(gdf_polygons)//2
    
    gdf_points = gpd.read_postgis(query_point(tile_c), engine, geom_col='geom')
    lpoi = len(gdf_points)//2
    
    gdf = pd.concat([gdf_polygons.iloc[:lpol], gdf_points.iloc[:lpoi], gdf_polygons.iloc[lpol:], gdf_points.iloc[lpoi:]])
    gdf['color'] = ['blue']*(lpol) + ['orange']*(lpoi) + ['red']*(lpol+lpoi)
    gdf.to_file(f'data/fig5/contact_osm_map_osmid_{tile_c}.gpkg', driver='GPKG')

processing tile 579593100010
processing tile 610751303310
processing tile 610751313220
processing tile 601852203230


In [33]:
query_osminfo = lambda gdf: f"""
    select distinct *
    from tuberlin_euro2024_osm
    where osm_id in ({', '.join([str(oid) for oid in gdf.osm_id.drop_duplicates().tolist()])})
"""

In [34]:
for tile_c in tile_cs:
    print(f'processing OSM ID {tile_c}')
    gdf = gpd.read_file(f'data/fig5/contact_osm_map_osmid_{tile_c}.gpkg')
    osmid2attr = pd.DataFrame(pd.read_sql_query(query_osminfo(gdf), conn))

    for osm_id in osmid2attr.osm_id.drop_duplicates():
        print(f'\t{osm_id}')
        for k, v in osmid2attr[osmid2attr.osm_id==osm_id].iloc[0].items():
            if v is not None and k!='way':
                print(f'\t{k} {v}')
        print()

processing OSM ID 579593100010
	-898582
	tile_id 56619
	plz 70771
	osm_id -898582
	name U6: Fasanenhof => Gerlingen
	operator Stuttgarter Straßenbahnen AG
	ref U6
	route light_rail
	z_order 0
	way_area nan

	-898047
	tile_id 57065
	plz 70565
	osm_id -898047
	name U5: Leinfelden => Killesberg
	operator Stuttgarter Straßenbahnen AG
	ref U5
	route light_rail
	z_order 0
	way_area nan

	-162288
	tile_id 57069
	plz 70599
	osm_id -162288
	name U3: Plieningen => Vaihingen
	operator Stuttgarter Straßenbahnen AG
	ref U3
	route light_rail
	z_order 0
	way_area nan

	-1214006
	tile_id 57510
	plz 70565
	osm_id -1214006
	name U12: Dürrlewang => Remseck
	operator Stuttgarter Straßenbahnen AG
	ref U12
	route light_rail
	z_order 0
	way_area nan

	23711738
	tile_id 57513
	plz 70567
	osm_id 23711738
	operator SSB AG
	railway light_rail
	z_order 35
	way_area nan

	-253395
	tile_id 57957
	plz 70563
	osm_id -253395
	name U8: Vaihingen => Nellingen Ostfildern
	operator Stuttgarter Straßenbahnen AG
	ref U8
	ro

# Figure 6: contact types

In [None]:
query = f"""
    select "day", stime, dids, st_x(st_transform(geopoint, 4326)) as lon, st_y(st_transform(geopoint, 4326)) as lat, tl7--, array_length(inside_building, 1) > 0 as inside_building
    from covid_network_sdkv6_tl7_10m as cn
    --join txc_dt_grid_1000m as tx on cn.tile_id = tx.tile_id
    where "day" between '{di}' and '{df}'
"""
data = pd.DataFrame(pd.read_sql_query(query, conn))
data

In [None]:
data_sv = data.copy(deep=True)

In [None]:
data = data.explode('dids').reset_index(drop=True)
data = data.drop_duplicates()
data = data.merge(data.drop(columns=['lon','lat']), on=['day','stime','tl7'])#,'tl7','inside_building','lon','lat'])
data = data[data.dids_x != data.dids_y]
pairs = []
for did1, did2 in zip(data.dids_x, data.dids_y):
    pair = f'{did1}_{did2}' if did1 < did2 else f'{did2}_{did1}'
    #print(did1, did2, pair)
    pairs.append(pair)
data.loc[:,'pair'] = pairs
data = data.drop(columns=['dids_x','dids_y','tl7'])
data = data.drop_duplicates()
dmin = data.day.min()
#data['tt'] = data.day.apply(lambda d: (d-dmin).days)*24 + data.stime.dt.hour
data['tt'] = data.day.apply(lambda d: (d-dmin).days)*24*6 + data.stime.dt.hour*6 + (data.stime.dt.minute//10)
print(data.tt.max(), ((data.day.max()-dmin).days+1)*24, ((data.day.max()-dmin).days+1)*720)
data

In [None]:
data.to_csv('data/fig6/follow_didpairs.csv', index=False)

In [None]:
# contact number and duration per category for host cities
query = f"""
    with restricted as (
    	select cn.*
    	from tuberlin_euro2024_tileid as ti
    	join covid_network_sdkv6_{cdef} as cn on cn.tile_id = ti.tile_id
        where
                "day" between '{di}' and '{df}'
    ),
    cn_tmp as (
        select
                  tl{cdef[2]}
                , geopoint
                , "day"
                , stime
                , dids
                --, sources
             	--, bool_or(u.dist_stad < csa.radius_in_meter) as in_stadium
                --, min(u.area_id) as area_id
            from restricted, unnest(area_ids, dist_stads) u(area_id, dist_stad)
            left join cluster_search_areas_v2 csa on csa.area_id = u.area_id
            --group by 1,2,3,4,7
    ),
    cities2 as (
        select osm_id, "name", max(way_area) as way_area
        from tuberlin_euro2024_contour
        group by 1,2
    ),
    cities3 as (
        select c1."name", c1.way
        from tuberlin_euro2024_contour as c1
        join cities2 as c2 on c1.way_area = c2.way_area
    ),
    cn as (
        select
                  tl{cdef[2]}
                , "day"
                , stime
                , unnest(dids) as did
                --, sources
        		--, in_stadium
                --, area_id
                , "name" as city
                , geopoint
        from cn_tmp
        join cities3 as c3 on st_contains(c3.way, cn_tmp.geopoint)
    ),
    cn_homes as (
        select
              tl{cdef[2]}
            , "day"
            , stime
            , array_agg(cn.did order by cn.did) as dids
            , city
            , st_x(st_transform(geopoint, 4326)) as lon, st_y(st_transform(geopoint, 4326)) as lat
            --, array_agg(st_distance(st_transform(hw.weighted_centroid, 3857), geopoint) order by cn.did) as homedists
            , array_agg(st_x(st_transform(hw.weighted_centroid, 4326)) order by cn.did) as lons_home
            , array_agg(st_y(st_transform(hw.weighted_centroid, 4326)) order by cn.did) as lats_home
        from cn
        left join home_work_sdkv6 as hw on hw.valid_for = date_trunc('month', cn."day") + INTERVAL '1 month' - INTERVAL '1 day'
        and hw.did = cn.did and hw.place = 'home'
        group by 1,2,3,5,6,7
    )
    select *
    from cn_homes
"""
data = pd.DataFrame(pd.read_sql_query(query, conn))
data

In [None]:
data_sv = data.copy(deep=True)

In [None]:
data = data_sv.copy(deep=True)

In [None]:
data = data.explode(['dids','lons_home','lats_home']).reset_index(drop=True)
data = data.drop_duplicates()
data = data.merge(data.drop(columns=['lon','lat']), on=['day','city','stime','tl7'])#,'tl7','inside_building','lon','lat'])
data = data[data.dids_x != data.dids_y]
pairs, dhome1s, dhome2s, dhometohomes = [], [], [], []
for i, (did1, did2, lon, lat, lon1, lon2, lat1, lat2) in enumerate(zip(data.dids_x, data.dids_y, data.lon, data.lat, data.lons_home_x, data.lons_home_y, data.lats_home_x, data.lats_home_y)):
    if (i+1)%10000==0:
        print(f'progress: {i+1}/{len(data)}')
    pair = f'{did1}_{did2}' if did1 < did2 else f'{did2}_{did1}'
    #print(did1, did2, pair)
    pairs.append(pair)
    if did1 < did2:
        if None not in [lon, lon1, lat, lat1]:
            dhome1s.append(geodesic([lat, lon], [lat1, lon1]).meters)
        else:
            dhome1s.append(np.nan)
        if None not in [lon, lon2, lat, lat2]:
            dhome2s.append(geodesic([lat, lon], [lat2, lon2]).meters)
        else:
            dhome2s.append(np.nan)
    else:
        if None not in [lon, lon1, lat, lat1]:
            dhome2s.append(geodesic([lat, lon], [lat1, lon1]).meters)
        else:
            dhome2s.append(np.nan)
        if None not in [lon, lon2, lat, lat2]:
            dhome1s.append(geodesic([lat, lon], [lat2, lon2]).meters)
        else:
            dhome1s.append(np.nan)
    #print(lon1, lon2, lat1, lat2, geodesic([lat1, lon1], [lat2, lon2]).meters)
    if None not in [lon1, lon2, lat1, lat2]:
        dhometohomes.append(geodesic([lat1, lon1], [lat2, lon2]).meters)
    else:
        dhometohomes.append(np.nan)
data.loc[:,'pair'] = pairs
data.loc[:,'dhome1'] = dhome1s
data.loc[:,'dhome2'] = dhome2s
data.loc[:,'dhometohome'] = dhometohomes
data = data.drop(columns=['dids_x','dids_y','lons_home_x','lons_home_y','lats_home_x','lats_home_y','tl7'])
data = data.drop_duplicates()
dmin = data.day.min()
#data['tt'] = data.day.apply(lambda d: (d-dmin).days)*24 + data.stime.dt.hour
data['tt'] = data.day.apply(lambda d: (d-dmin).days)*24*6 + data.stime.dt.hour*6 + (data.stime.dt.minute//10)
print(data.tt.max(), ((data.day.max()-dmin).days+1)*24, ((data.day.max()-dmin).days+1)*720)
data

In [None]:
print('test triangle inequality:')
data_tmp = data[(~data.dhome1.isna()) & (~data.dhome2.isna()) & (~data.dhometohome.isna())]
print(len(data_tmp))
print(sum(~(data_tmp.dhometohome >= (data_tmp.dhome1 - data_tmp.dhome2).abs())))
print(sum(~(data_tmp.dhometohome <= (data_tmp.dhome1 + data_tmp.dhome2))))

In [None]:
data_tmp[~(data_tmp.dhometohome <= (data_tmp.dhome1 + data_tmp.dhome2))]

In [None]:
data.to_csv('data/fig6/follow_didpairs_cities.csv', index=False)

# Figure 7: POI contacts Germany

## home/work

In [None]:
query = lambda d: f"""
    with cn2 as (
    	select "day", stime, tl{cdef[2]}, geopoint, st_translate(st_setsrid(tile7togeo(tl7/10), 32632), tx.minx, tx.miny) as contact, dids
    	from covid_network_sdkv6_{cdef} as cn
    	join txc_dt_grid_1000m as tx on tx.tile_id = cn.tile_id
    	where "day" = '{str(d)}'
    	--limit 2
    )
    , cn3 as (
    	select "day", stime, tl{cdef[2]}, geopoint, contact, unnest(dids) as did
    	from cn2
    )
    , cn_hw as (
    	select "day", stime, tl{cdef[2]}, cn3.did, geopoint
    		, st_distance(cn3.contact, h.weighted_centroid) <= 100 as is_home
    		, st_distance(cn3.contact, w.weighted_centroid) <= 100 as is_work
    	from cn3
    	left join home_work_sdkv6 as h
    	on h.did = cn3.did and h.valid_for = (date_trunc('month', "day") + interval '1 month - 1 day')::date
    	left join home_work_sdkv6 as w
    	on w.did = cn3.did and w.valid_for = (date_trunc('month', "day") + interval '1 month - 1 day')::date
    	where h.place='home' and w.place='work'
    )
    select "day", stime, tl{cdef[2]}, st_x(st_transform(geopoint, 4326)) as lon, st_y(st_transform(geopoint, 4326)) as lat
    		, array_agg(did) as dids, bool_or(is_home) as is_home, bool_or(is_work) as is_work
    from cn_hw
    group by 1,2,3,4,5
"""
data_m1 = pd.DataFrame()
for d in [ds[0]-timedelta(1)]+ds[:]:
    print(f'processing {str(d)}')
    data_m1_tmp = pd.DataFrame(pd.read_sql_query(query(d), conn))
    data_m1 = pd.concat([data_m1, data_m1_tmp])

In [None]:
data_m1 # 5165948 rows × 8 columns

In [None]:
data_m1.to_csv('data/fig7/homework_contacts.csv', index=False)

## OSM polygon POIs

In [None]:
query = lambda d: f"""
    with cn_poi as (
    	select *
    	from covid_network_sdkv6_{cdef} as cn
        join tuberlin_euro2024_osm2 as poi on poi.tile_id = cn.tile_id
        join planet_osm_polygon as osm on osm.osm_id = poi.osm_id
    	where "day" = '{str(d)}'
        and st_intersects(osm.way, tile{cdef[2]}2polygon(st_transform(cn.geopoint, 3857)))
    )
    select "day", stime, dids, st_x(st_transform(geopoint, 4326)) as lon, st_y(st_transform(geopoint, 4326)) as lat, tl7, cn_poi.*--, way_polygon, geopoint
    from cn_poi
"""
data_0 = pd.DataFrame()
for d in [ds[0]-timedelta(1)]+ds[:]:
    print(f'processing {str(d)}')
    data_0_tmp = pd.DataFrame(pd.read_sql_query(query(d), conn))
    data_0_tmp = data_0_tmp.drop(columns=['way','geopoint','inside_building'])
    data_0 = pd.concat([data_0, data_0_tmp])

In [None]:
data_0.to_csv('data/fig7/osmpolygonline_contacts.csv', index=False)

## OSM node/point POIs

In [None]:
query = lambda d: f"""
    with cn_poi as (
    	select *
    	from covid_network_sdkv6_{cdef} as cn
    	join euro2024_poi_all as poi on poi.tile_id = cn.tile_id
    	where "day" = '{str(d)}'
        and st_intersects(poi.way_polygon, tile{cdef[2]}2polygon(st_transform(cn.geopoint, 3857)))
    )
    select amenity, service, shop, tourism, "day", stime, dids, st_x(st_transform(geopoint, 4326)) as lon, st_y(st_transform(geopoint, 4326)) as lat, tl7--, way_polygon, geopoint
    from cn_poi
"""
data_1 = pd.DataFrame()
for d in ds[:]:
    print(f'processing {str(d)}')
    data_1_tmp = pd.DataFrame(pd.read_sql_query(query(d), conn))
    data_1 = pd.concat([data_1, data_1_tmp])

In [None]:
data_1.to_csv('data/fig7/osmnode_contacts.csv', index=False)

## fan zone POIs

- 17707818
- 178050313
- -7360802
- 143672616
- -222512 146613649
- 165636449
- 172420880 (172341584)
- 4218882
- -3538248
- 4979228
- -3995902
- -6404164
- 10053878
- 3933618
- 3990007
- 507962489
- 3996756
- 231448110, 4567556, 503322108, 29065531, 147841549, 532727925 (line)
- -1769862
- 417328807, 278980053, 417328805 (point)
- 499593498
- 24240304

In [None]:
query = f"""
    with osm_polygons as (
    	select osm_id, name, way
    	from planet_osm_polygon
    	where osm_id in (17707818,178050313,-7360802,143672616,-222512,146613649,165636449,172420880,172341584,4218882,
    					 -3538248,4979228,-3995902,-6404164,10053878,3933618,3990007,507962489,3996756,-1769862,499593498,24240304)
    )
    , osm_lines as (
    	select osm_id, name, way
    	from planet_osm_line
    	where osm_id in (231448110, 4567556, 503322108, 29065531, 147841549, 532727925)
    )
    , osm_points as (
    	select osm_id, name, way
    	from planet_osm_point
    	where osm_id in (417328807, 278980053, 417328805)
    )
    , osm as (
    	select *
    	from osm_polygons
    	union all
    	select *
    	from osm_lines
    	union all
    	select *
    	from osm_points
    )
    select "day", stime, dids, st_x(st_transform(geopoint, 4326)) as lon, st_y(st_transform(geopoint, 4326)) as lat, tl7, osm.osm_id, osm.name--, cn.geopoint, osm.osm_id, osm.way
    from covid_network_sdkv6_{cdef} as cn, osm
    where "day" between '{str(di-timedelta(1))}' and '{str(df)}'
    and st_distance(st_transform(cn.geopoint, 3857), osm.way) <= 100
"""
data_2 = pd.DataFrame(pd.read_sql_query(query, conn))
data_2 # 7346 rows × 8 columns

In [None]:
data_2.to_csv('data/fig7/fanzone0_contacts.csv', index=False)

In [39]:
yeari, yearf = '2024', '2024'
weeki, weekf = '18', '31'

In [40]:
di = datetime.strptime(f'{yeari}-{weeki}-1', "%Y-%W-%w").date()
df = datetime.strptime(f'{yearf}-{weekf}-1', "%Y-%W-%w").date() + timedelta(6)
ds = [di+timedelta(dt) for dt in range((df-di).days+1)]
daylist = ds
print(di, 'until', df)

2024-04-29 until 2024-08-04


In [41]:
cdef = 'tl7_10m'# 'tl5_10m' 'tl6_10m' 'tl7_10m' 'tl8_10m' 'tl8_60m'
cdef_alt = '16m_10min'# tl5: 62 ... tl7: 16   tl8: 8

In [42]:
data_m1 = pd.read_csv('data/fig7/homework_contacts.csv', low_memory=False)
data_m1['day'] = [d.date() for d in pd.to_datetime(data_m1.day)]
data_m1['stime'] = pd.to_datetime(data_m1.stime)
data_m1['dids'] = data_m1.dids.apply(lambda x: x[2:-2].split("', '"))

data_m1['stime'] = data_m1.stime.apply(lambda x: x.astimezone(pytz.timezone('Europe/Berlin')))
data_m1['day'] = data_m1.stime.apply(lambda x: x.date())
data_m1 = data_m1[(data_m1.day >= di) & (data_m1.day <= df)]

data_home = data_m1[[col for col in data_m1.columns if col!='is_work']]
data_home = data_home[data_home.is_home]
data_home['venue'] = 'home'
data_home = data_home.drop(columns=['is_home'])
data_work = data_m1[[col for col in data_m1.columns if col!='is_home']]
data_work = data_work[data_work.is_work]
data_work['venue'] = 'work'
data_work = data_work.drop(columns=['is_work'])
data_m1 = pd.concat([data_home, data_work])
data_m1

Unnamed: 0,day,stime,tl7,lon,lat,dids,venue
46825,2024-04-29,2024-04-29 00:00:00+02:00,75373033330,9.489251,47.660960,[3F87D5A10A221D42E76428BDE19F5C00],home
46826,2024-04-29,2024-04-29 00:00:00+02:00,240503203120,9.504092,48.012641,"[2021AE18E46647BDF7021B39E0DD1F63, AEB2D0A201C...",home
46827,2024-04-29,2024-04-29 00:00:00+02:00,243880220030,8.477211,48.017618,"[1CEFF206645FC9CE92C799CE949CC841, F1B2677919E...",home
46828,2024-04-29,2024-04-29 00:00:00+02:00,283631101020,12.218210,48.051318,"[AE1EE3BCB2B2E5E1564A444FDC001941, 2A34B33CD8E...",home
46829,2024-04-29,2024-04-29 00:00:00+02:00,287332021330,11.713671,48.079257,"[A2233ECDFE8A4C1284B9801FA3C4A841, A2233ECDFE8...",home
...,...,...,...,...,...,...,...
5165031,2024-08-04,2024-08-04 23:40:00+02:00,3169013021120,7.170709,53.124114,"[59870CF41CD2A774C870D951585BD1B2, 4352CA420E1...",work
5165037,2024-08-04,2024-08-04 23:40:00+02:00,3407500112210,10.847004,53.541466,[259C382A3F16B90B5E8BC0F546C8EE1F],work
5165041,2024-08-04,2024-08-04 23:40:00+02:00,3645921300300,14.267073,53.918205,"[5B5C40E6358A225CAF151693D3A1E90B, 5B5C40E6358...",work
5165053,2024-08-04,2024-08-04 23:50:00+02:00,706032221320,8.809893,48.995041,"[AB6E3CBE8C085BC4E84426995F3931F0, 25DA0C1188F...",work


In [43]:
data_0 = pd.read_csv('data/fig7/osmpolygonline_contacts.csv', low_memory=False)
data_0['day'] = [d.date() for d in pd.to_datetime(data_0.day)]
data_0['stime'] = pd.to_datetime(data_0.stime)
data_0['dids'] = data_0.dids.apply(lambda x: x[2:-2].split("', '"))

data_0['stime'] = data_0.stime.apply(lambda x: x.astimezone(pytz.timezone('Europe/Berlin')))
data_0['day'] = data_0.stime.apply(lambda x: x.date())
data_0 = data_0[(data_0.day >= di) & (data_0.day <= df)]
data_0 = data_0.drop(columns=[f'tl{cdef[2]}.1','day.1','stime.1','dids.1','tile_id.1','osm_id.1'])

feats = ['access','aerialway','aeroway','amenity','area',
         'barrier','bicycle','bridge','boundary','building',
         'construction','covered','cutting',
         'denomination','disused','embankment',
         'foot',
         'generator:source',
         'harbour', 'highway','historic','horse',
         'intermittent',
         'junction',
         'landuse','leisure',
         'man_made','military','motorcar',
         'natural','office','oneway',
         'place','power','public_transport',
         'railway','religion',
         'service','shop','sport','surface',
         'tourism','tower:type','tunnel',
         'water','waterway','wetland','wood'
        ]
data_0b = pd.DataFrame()
for feat in feats[:]:
    data_tmp = data_0[[feat,'day','stime','dids','lon','lat',f'tl{cdef[2]}']]
    data_tmp = data_tmp[~data_tmp[feat].isna()]
    data_tmp = data_tmp.rename(columns={feat:'venue'})
    data_tmp['venue'] = data_tmp.venue.apply(lambda x: feat+':'+x)
    data_tmp['venue_type'] = feat
    data_0b = pd.concat([data_0b, data_tmp])
data_0 = data_0b
data_0

Unnamed: 0,venue,day,stime,dids,lon,lat,tl7,venue_type
2329,access:yes,2024-04-29,2024-04-29 00:40:00+02:00,"[42070EE2911BC2E35231CDA02822D414, B193D35838F...",13.444789,52.485052,2850361300200,access
2336,access:customers,2024-04-29,2024-04-29 01:10:00+02:00,"[184DA607D0BC230DA4D70C394C9F9E68, 184DA607D0B...",13.368460,52.525716,2872350322330,access
10662,access:yes,2024-04-29,2024-04-29 00:40:00+02:00,"[42070EE2911BC2E35231CDA02822D414, B193D35838F...",13.424629,52.486090,2850350312000,access
13687,access:yes,2024-04-29,2024-04-29 15:10:00+02:00,"[84A908665AA48656C32700E743F6085A, C19588E8A14...",6.916182,50.968653,1791311231200,access
13726,access:customers,2024-04-29,2024-04-29 11:20:00+02:00,"[F5D1D31D251CD5316416BF8EC6EDEA41, F5D1D31D251...",6.820269,51.229406,1977193103200,access
...,...,...,...,...,...,...,...,...
1650079,wood:deciduous,2024-07-27,2024-07-27 13:20:00+02:00,"[453E271CA0BB11A9580BCA04CFA07178, D79B7A99D1C...",7.492388,51.576796,2217021000320,wood
1650080,wood:deciduous,2024-07-27,2024-07-27 13:20:00+02:00,"[453E271CA0BB11A9580BCA04CFA07178, D79B7A99D1C...",7.495107,51.576410,2217021011100,wood
1661628,wood:deciduous,2024-07-28,2024-07-28 14:40:00+02:00,"[42A8E5856CEF4F2BA1300D202EC34D71, 6EBE7B2E648...",7.495581,51.575713,2210753320220,wood
1737877,wood:deciduous,2024-08-02,2024-08-02 16:30:00+02:00,"[5E9D0AC5D3DE401591949376167A864D, D1F41CD24E0...",6.852346,51.232670,1977222221020,wood


In [44]:
for v in sorted(data_0.venue.unique()):
    print(v)

access:It's_equally_important_to_use_ramp=no.
access:bus
access:clients
access:customer
access:customers
access:customers;delivery
access:customersq22
access:delivery
access:designated
access:destination
access:disabled
access:emergency
access:employees
access:green_sticker_germany
access:hgv
access:loading
access:no
access:official
access:permissive
access:permit
access:police
access:private
access:private3
access:prohibited
access:public
access:restricted
access:service
access:unknown
access:visitors
access:yes
aerialway:station
aeroway:aerodrome
aeroway:apron
aeroway:hangar
aeroway:helipad
aeroway:heliport
aeroway:jet_bridge
aeroway:terminal
amenity:Travel_agency
amenity:abandoned
amenity:agency
amenity:animal_boarding
amenity:animal_shelter
amenity:arts_centre
amenity:atm
amenity:baggage_claim
amenity:bakery
amenity:bank
amenity:bar
amenity:bbq
amenity:bench
amenity:bicycle_parking
amenity:bicycle_rental
amenity:biergarten
amenity:boat_rental
amenity:boathouse
amenity:brothel
ameni

In [45]:
data_1 = pd.read_csv('data/fig7/osmnode_contacts.csv')
data_1['day'] = [d.date() for d in pd.to_datetime(data_1.day)]
data_1['stime'] = pd.to_datetime(data_1.stime)
data_1['dids'] = data_1.dids.apply(lambda x: x[2:-2].split("', '"))

data_1['stime'] = data_1.stime.apply(lambda x: x.astimezone(pytz.timezone('Europe/Berlin')))
data_1['day'] = data_1.stime.apply(lambda x: x.date())
data_1 = data_1[(data_1.day >= di) & (data_1.day <= df)]

data_amenity = data_1[[col for col in data_1.columns if col not in ['service','shop','tourism']]]
data_amenity = data_amenity[~data_amenity.amenity.isna()]
data_amenity = data_amenity.rename(columns={'amenity':'venue'})
data_amenity['venue'] = data_amenity.venue.apply(lambda x: '*amenity:'+x)
data_amenity['venue_type'] = 'amenity'
data_service = data_1[[col for col in data_1.columns if col not in ['amenity','shop','tourism']]]
data_service = data_service[~data_service.service.isna()]
data_service = data_service.rename(columns={'service':'venue'})
data_service['venue'] = data_service.venue.apply(lambda x: '*service:'+x)
data_service['venue_type'] = 'service'
data_shop = data_1[[col for col in data_1.columns if col not in ['amenity','service','tourism']]]
data_shop = data_shop[~data_shop.shop.isna()]
data_shop = data_shop.rename(columns={'shop':'venue'})
data_shop['venue'] = data_shop.venue.apply(lambda x: '*shop:'+x)
data_shop['venue_type'] = 'shop'
data_tourism = data_1[[col for col in data_1.columns if col not in ['amenity','service','shop']]]
data_tourism = data_tourism[~data_tourism.tourism.isna()]
data_tourism = data_tourism.rename(columns={'tourism':'venue'})
data_tourism['venue'] = data_tourism.venue.apply(lambda x: '*tourism:'+x)
data_tourism['venue_type'] = 'tourism'
data_1 = pd.concat([data_amenity, data_service, data_shop, data_tourism])
data_1

Unnamed: 0,venue,day,stime,dids,lon,lat,tl7,venue_type
1,*amenity:driving_school,2024-04-29,2024-04-29 10:00:00+02:00,"[1CB8D6D37745A8A0079190B46F574FAB, 1CB8D6D3774...",11.176107,47.998614,245890323010,amenity
2,*amenity:doctors,2024-04-29,2024-04-29 09:20:00+02:00,"[597C9DDAE3780ED653C7C3A75C30384D, 9186E28A8C0...",8.592681,49.123227,776600122320,amenity
3,*amenity:cafe,2024-04-29,2024-04-29 10:10:00+02:00,"[9DE0240AEBFBD1C2BB670EF4C7B27246, E724DEC8A83...",8.598889,49.123530,776601211130,amenity
4,*amenity:atm,2024-04-29,2024-04-29 07:50:00+02:00,"[2153A287300E13AEE8036A7904EA9448, C346B7FAA54...",9.236848,49.212110,826830102000,amenity
7,*amenity:doctors,2024-04-29,2024-04-29 14:50:00+02:00,"[1B341CDF09299F4FF8781ADB05D85A2F, E3653AEEF27...",9.773081,49.493255,978123113130,amenity
...,...,...,...,...,...,...,...,...
7306480,*tourism:artwork,2024-08-04,2024-08-04 13:40:00+02:00,"[08D1D7DFC7F7474EAE620A2E60892042, 9DFAD9C4CA4...",13.395766,52.503757,2861350120310,tourism
7306481,*tourism:artwork,2024-08-04,2024-08-04 13:40:00+02:00,"[08D1D7DFC7F7474EAE620A2E60892042, 9DFAD9C4CA4...",13.396824,52.505123,2861350303110,tourism
7306501,*tourism:attraction,2024-08-04,2024-08-04 15:50:00+02:00,"[2738BF92394DD721D1562C3C019A120F, 930D9EF4A63...",8.514383,53.738166,3507993111100,tourism
7306502,*tourism:information,2024-08-04,2024-08-04 15:50:00+02:00,"[2738BF92394DD721D1562C3C019A120F, 930D9EF4A63...",8.514383,53.738166,3507993111100,tourism


In [47]:
data_2 = pd.read_csv('data/fig7/fanzone0_contacts.csv')
data_2['day'] = [d.date() for d in pd.to_datetime(data_2.day)]
data_2['stime'] = pd.to_datetime(data_2.stime)
data_2['dids'] = data_2.dids.apply(lambda x: x[2:-2].split("', '"))

data_2['stime'] = data_2.stime.apply(lambda x: x.astimezone(pytz.timezone('Europe/Berlin')))
data_2['day'] = data_2.stime.apply(lambda x: x.date())
data_2 = data_2[(data_2.day >= di) & (data_2.day <= df)].copy(deep=True)

city2zone = {
    'Berlin':['Platz der Republik','Platz des 18. März','Straße des 17. Juni'],
    'Hamburg':['Heiligengeistfeld'],
    'Gelsenkirchen':['Nordsternpark','Nordsternplatz'],
    'Dortmund':['Friedensplatz','Westfalenpark'],
    'Düsseldorf':['Burgplatz','Gustav-Gründgens-Platz','Rheinpark','Rheinwerft'],
    'Köln':['Aachener Weiher','Heumarkt'],
    'Leipzig':['Dr.-Otto-Koch-Denkmal','Gellert-Denkmal','Moritzbastei','Robert-Koch-Park','Schiller-Denkmal','Tiefgarage Augustusplatz'],
    'Stuttgart':['Karlsplatz','Marktplatz','Schillerplatz','Schlossplatz'],
    'München':['Olympiapark / Olympiagelände'],
    'Frankfurt am Main':['Nizza','Untermainkai'],
}
zone2city = {}
for city, zones in city2zone.items():
    for zone in zones:
        zone2city[zone] = city
data_2['city'] = data_2.name.map(zone2city)

data_2 = data_2.drop(columns=['osm_id','name'])
data_2['venue'] = 'fanzone'
data_2['venue_type'] = 'fanzone'
data_2

Unnamed: 0,day,stime,dids,lon,lat,tl7,city,venue,venue_type
0,2024-05-03,2024-05-03 18:10:00+02:00,"[C056EB142AFB2D8D879EEE46E3B08329, C056EB142AF...",12.376413,51.336893,2076262032130,Leipzig,fanzone,fanzone
1,2024-05-03,2024-05-03 18:10:00+02:00,"[C056EB142AFB2D8D879EEE46E3B08329, E22C4C2C321...",12.376199,51.337039,2076262032300,Leipzig,fanzone,fanzone
2,2024-05-19,2024-05-19 00:30:00+02:00,"[9EBAFB2779D28EC6E553BB639F939343, 9EBAFB2779D...",12.376189,51.336899,2076262032120,Leipzig,fanzone,fanzone
3,2024-05-19,2024-05-19 15:50:00+02:00,"[4CB93908F4225F9CCD4CEE6FC3CB09B7, 4D09CBC748D...",12.376861,51.336880,2076262033030,Leipzig,fanzone,fanzone
4,2024-05-25,2024-05-25 19:30:00+02:00,"[3E25F87C4D7F79B30CBD39B3B26957B6, 409A035ED3E...",12.376189,51.336899,2076262032120,Leipzig,fanzone,fanzone
...,...,...,...,...,...,...,...,...,...
7341,2024-08-03,2024-08-03 17:20:00+02:00,"[A31243123D7AB931491871B8A1ED7085, C60C0C2C1C5...",13.377470,52.516666,2866843111120,Berlin,fanzone,fanzone
7342,2024-08-03,2024-08-03 17:20:00+02:00,"[8961C7696B517E0E4D0476E57E414FAE, A31243123D7...",13.377484,52.516806,2866843111300,Berlin,fanzone,fanzone
7343,2024-07-29,2024-07-29 13:30:00+02:00,"[4DD7BFEFF8C1EF9265F33F709E3B76B4, 4DD7BFEFF8C...",13.377282,52.517095,2866843113010,Berlin,fanzone,fanzone
7344,2024-08-03,2024-08-03 14:20:00+02:00,"[D2B9A15C77BCCD1FBA78D734A29E4940, D81C0F13FE7...",13.377901,52.516369,2866850222220,Berlin,fanzone,fanzone


In [48]:
data = pd.concat([data_1, data_2.drop(columns=['city'])])# data_0, 
data

Unnamed: 0,venue,day,stime,dids,lon,lat,tl7,venue_type
1,*amenity:driving_school,2024-04-29,2024-04-29 10:00:00+02:00,"[1CB8D6D37745A8A0079190B46F574FAB, 1CB8D6D3774...",11.176107,47.998614,245890323010,amenity
2,*amenity:doctors,2024-04-29,2024-04-29 09:20:00+02:00,"[597C9DDAE3780ED653C7C3A75C30384D, 9186E28A8C0...",8.592681,49.123227,776600122320,amenity
3,*amenity:cafe,2024-04-29,2024-04-29 10:10:00+02:00,"[9DE0240AEBFBD1C2BB670EF4C7B27246, E724DEC8A83...",8.598889,49.123530,776601211130,amenity
4,*amenity:atm,2024-04-29,2024-04-29 07:50:00+02:00,"[2153A287300E13AEE8036A7904EA9448, C346B7FAA54...",9.236848,49.212110,826830102000,amenity
7,*amenity:doctors,2024-04-29,2024-04-29 14:50:00+02:00,"[1B341CDF09299F4FF8781ADB05D85A2F, E3653AEEF27...",9.773081,49.493255,978123113130,amenity
...,...,...,...,...,...,...,...,...
7341,fanzone,2024-08-03,2024-08-03 17:20:00+02:00,"[A31243123D7AB931491871B8A1ED7085, C60C0C2C1C5...",13.377470,52.516666,2866843111120,fanzone
7342,fanzone,2024-08-03,2024-08-03 17:20:00+02:00,"[8961C7696B517E0E4D0476E57E414FAE, A31243123D7...",13.377484,52.516806,2866843111300,fanzone
7343,fanzone,2024-07-29,2024-07-29 13:30:00+02:00,"[4DD7BFEFF8C1EF9265F33F709E3B76B4, 4DD7BFEFF8C...",13.377282,52.517095,2866843113010,fanzone
7344,fanzone,2024-08-03,2024-08-03 14:20:00+02:00,"[D2B9A15C77BCCD1FBA78D734A29E4940, D81C0F13FE7...",13.377901,52.516369,2866850222220,fanzone


In [49]:
data = data.explode('dids').reset_index(drop=True)
data = data.drop_duplicates()
data = data.merge(data.drop(columns=['lon','lat']), on=['day','stime','tl7','venue','venue_type',])#,'tl7','inside_building','lon','lat'])
data = data[data.dids_x != data.dids_y]
pairs = []
for did1, did2 in zip(data.dids_x, data.dids_y):
    pair = f'{did1}_{did2}' if did1 < did2 else f'{did2}_{did1}'
    #print(did1, did2, pair)
    pairs.append(pair)
data.loc[:,'pair'] = pairs
data = data.drop(columns=['dids_x','dids_y','tl7'])
data = data.drop_duplicates()
dmin = data.day.min()
#data['tt'] = data.day.apply(lambda d: (d-dmin).days)*24 + data.stime.dt.hour
data['tt'] = data.day.apply(lambda d: (d-dmin).days)*24*6 + data.stime.dt.hour*6 + (data.stime.dt.hour//10)
print(data.tt.max(), ((data.day.max()-dmin).days+1)*24, ((data.day.max()-dmin).days+1)*720)
data['hour'] = data.stime.dt.hour
data

14108 2352 70560


Unnamed: 0,venue,day,stime,lon,lat,venue_type,pair,tt,hour
1,*amenity:driving_school,2024-04-29,2024-04-29 10:00:00+02:00,11.176107,47.998614,amenity,1CB8D6D37745A8A0079190B46F574FAB_834AFE8E2B2DA...,61,10
5,*amenity:doctors,2024-04-29,2024-04-29 09:20:00+02:00,8.592681,49.123227,amenity,597C9DDAE3780ED653C7C3A75C30384D_9186E28A8C037...,54,9
9,*amenity:cafe,2024-04-29,2024-04-29 10:10:00+02:00,8.598889,49.123530,amenity,9DE0240AEBFBD1C2BB670EF4C7B27246_E724DEC8A8350...,61,10
13,*amenity:atm,2024-04-29,2024-04-29 07:50:00+02:00,9.236848,49.212110,amenity,2153A287300E13AEE8036A7904EA9448_C346B7FAA549C...,42,7
17,*amenity:doctors,2024-04-29,2024-04-29 14:50:00+02:00,9.773081,49.493255,amenity,1B341CDF09299F4FF8781ADB05D85A2F_E3653AEEF2710...,85,14
...,...,...,...,...,...,...,...,...,...
14892950,fanzone,2024-07-29,2024-07-29 13:30:00+02:00,13.377282,52.517095,fanzone,4DD7BFEFF8C1EF9265F33F709E3B76B4_90A05C501A5E6...,13183,13
14892951,fanzone,2024-07-29,2024-07-29 13:30:00+02:00,13.377282,52.517095,fanzone,4DD7BFEFF8C1EF9265F33F709E3B76B4_FD66AFCE625BE...,13183,13
14892954,fanzone,2024-07-29,2024-07-29 13:30:00+02:00,13.377282,52.517095,fanzone,90A05C501A5E614E8D6BBE367F9C8639_FD66AFCE625BE...,13183,13
14892959,fanzone,2024-08-03,2024-08-03 14:20:00+02:00,13.377901,52.516369,fanzone,D2B9A15C77BCCD1FBA78D734A29E4940_D81C0F13FE7FD...,13909,14


In [None]:
#data.to_csv('data/fig7/poi_contacts2.csv', index=False)
data.to_csv('data/fig7/poi_contacts3.csv', index=False)