In [1]:
import os
from time import sleep
import json
from tqdm import tqdm
tqdm.pandas(desc='pandas')

import pandas as pd
import numpy as np
from dask import dataframe as dd

from geopy.geocoders import Nominatim
geoEncoder = Nominatim(user_agent='spanish-cities')

def backup(dic, path):
    with open(path,'w') as f:
        json.dump(obj=dic, fp=f, indent=4)

def restore(path):
    with open(path) as f:
        return json.load(f)
        
def distance(coords1: tuple, coords2: tuple):
    return np.sqrt((coords1[0] - coords2[0])**2  +  (coords1[1] - coords2[1])**2)



## city associations

### get glovo city coords

In [2]:
df = pd.read_excel('glovo_city_codes.xlsx', index_col=0)

if not os.path.exists('glovoCityCoords.json'):
    glovoCityCoords = dict.fromkeys(list(df.city_name), np.nan)  

    for city in tqdm(glovoCityCoords.keys()):
        sleep(1)
        try:    
            geoInfo = geoEncoder.geocode(f'{city}, Spain').raw
            glovoCityCoords[city] = float(geoInfo['lat']), float(geoInfo['lon'])
        except:
            glovoCityCoords[city] = np.nan, np.nan

    backup(glovoCityCoords, 'glovoCityCoords.json')

else:
    glovoCityCoords = restore('glovoCityCoords.json')

glovoCityCoords

{'Aguadulce': [37.2530195, -4.9906167],
 'Albacete': [38.9950921, -1.8559154],
 'Alicante': [38.3436365, -0.4881708],
 'Algeciras': [36.1311725, -5.4473991],
 'Almeria': [36.8414197, -2.4628135],
 'Alcantarilla ': [37.9680342, -1.214954],
 'Avila': [40.656478, -4.7002172],
 'Barcelona': [41.3828939, 2.1774322],
 'Bilbao': [43.2630018, -2.9350039],
 'Badajoz': [38.88964355, -6.980142450723942],
 'Barakaldo': [43.29548, -2.9900933],
 'Benidorm': [38.5406255, -0.1290929],
 'Brunete': [40.4050499, -3.9979912],
 'Cartagena': [37.6019353, -0.9841152],
 'Cadiz': [36.5315575993944, -6.280563331226297],
 'Colmenar Viejo': [40.6587726, -3.7659722],
 'Castelldefels': [41.2861022, 1.9824173],
 'Castellón de la Plana': [39.9860347, -0.0377354],
 'Sant Cugat del Valles': [41.4728432, 2.0817809],
 'Donosita': [43.39907934659774, -1.975314531094155],
 'Elche': [38.2653307, -0.6988391],
 'Fuenlabrada': [40.282476, -3.7923422],
 'San Fernando': [36.4643934, -6.198203],
 'Gava': [41.3050933, 2.0063126],


In [3]:
# check how many glovo cities have no coordinates
[city for city, coords in glovoCityCoords.items() if np.isnan(coords[0])]

[]

In [4]:
# # lookup missings manually
# glovoCityCoords['Cadiz'] = [36.5315575993944, -6.280563331226297]
# glovoCityCoords['Donosita'] = [43.39907934659774, -1.975314531094155]
# glovoCityCoords['Valencia'] = [39.47424750070927, -0.37628868297826856]

# # del glovoCityCoords['Foligno']


# backup(glovoCityCoords, 'glovoCityCoords.json')

### find closest glovo city for all census towns

In [5]:
municipalities = pd.read_excel('../aux_data/01_census/data/provincial and municipal codes.xlsx', usecols=['CODAUTO', 'NOMBRE'])
municipalities

Unnamed: 0,CODAUTO,NOMBRE
0,16,Alegría-Dulantzi
1,16,Amurrio
2,16,Aramaio
3,16,Artziniega
4,16,Armiñón
...,...,...
8126,2,"Zaida, La"
8127,2,Zaragoza
8128,2,Zuera
8129,18,Ceuta


In [6]:
autoCommCodes = pd.read_excel('autonomousCommunitiesCodes.xlsx', usecols=['CODAUTO', 'Comunidad Autónoma']).drop_duplicates().reset_index(drop=True)
autoCommCodes

Unnamed: 0,CODAUTO,Comunidad Autónoma
0,1,Andalucía
1,2,Aragón
2,3,"Asturias, Principado de"
3,4,"Balears, Illes"
4,5,Canarias
...,...,...
14,15,"Navarra, Comunidad Foral de"
15,16,País Vasco
16,17,"Rioja, La"
17,18,Ceuta


In [29]:
# association table
ass = pd.merge(municipalities, autoCommCodes, on='CODAUTO', how='inner')
ass = (
    ass
    .drop(columns=['CODAUTO'])
    .rename(columns={
        'Comunidad Autónoma': 'autonomous_community', 
        'NOMBRE': 'municipality',
        })
)
ass

Unnamed: 0,municipality,autonomous_community
0,Alegría-Dulantzi,País Vasco
1,Amurrio,País Vasco
2,Aramaio,País Vasco
3,Artziniega,País Vasco
4,Armiñón,País Vasco
...,...,...
8126,Villaescusa,Cantabria
8127,Villafufre,Cantabria
8128,Voto,Cantabria
8129,Ceuta,Ceuta


In [30]:
def getCoords(location: str):
    sleep(0.8)
    try:    
        geoInfo = geoEncoder.geocode(f'{location}, Spain').raw
        return float(geoInfo['lat']), float(geoInfo['lon'])
    except:
        return np.nan, np.nan

def retrieveCoords(city: str, censusMunicipalityCoords: dict):
    if (city in censusMunicipalityCoords) & (not np.isnan(censusMunicipalityCoords[city][0])):
        return censusMunicipalityCoords[city]
    else:
        return getCoords(city)
        

def findClosestGlovoCity(testCoords: tuple, glovoCityCoords: dict):
    '''
    finds closest glovo-city in a given radius. 
    trialled radiusses:
    - 10km/0.15 deg: 95% of municipalities have no glovo-city -> 200 municipalities for 70 cities: 3 municipalities/city
    - 20km/0.30 deg: 75% of municipalities have no glovo-city - > 2000 municipalities for 70 cities: 30 municipalities/city
      (despite this, 1.7m of 2m census observations still have a glovo-city, because the areas surrounding glovo cities are the most populated, thus most represented in census)
    '''
    theFavourite = ('', np.inf)

    for city, coords in glovoCityCoords.items():
        d = distance(testCoords, coords)

        if d < theFavourite[1] and d < 0.30:     
            theFavourite = (city, d)

    return theFavourite[0]

In [31]:
censusMunicipalityCoords = dict.fromkeys(list(municipalities.NOMBRE), np.nan) if not os.path.exists('censusMunicipalityCoords.json') else restore('censusMunicipalityCoords.json')

ass['municipality_coords'] = ass.municipality.progress_apply(retrieveCoords, censusMunicipalityCoords=censusMunicipalityCoords)
ass['closest_glovo_city'] = ass.municipality_coords.progress_apply(findClosestGlovoCity, glovoCityCoords=glovoCityCoords)
ass



pandas: 100%|██████████| 8131/8131 [05:14<00:00, 25.88it/s]  
pandas: 100%|██████████| 8131/8131 [00:01<00:00, 6263.50it/s]


Unnamed: 0,municipality,autonomous_community,municipality_coords,closest_glovo_city
0,Alegría-Dulantzi,País Vasco,"[42.8424145, -2.512674]",Vitoria
1,Amurrio,País Vasco,"[43.0525066, -3.000896]",Bilbao
2,Aramaio,País Vasco,"[43.035206099999996, -2.585761508234]",Vitoria
3,Artziniega,País Vasco,"[43.1210566, -3.1286742]",Barakaldo
4,Armiñón,País Vasco,"[42.722587, -2.8722115]",Vitoria
...,...,...,...,...
8126,Villaescusa,Cantabria,"[41.2055283, -5.4639875]",
8127,Villafufre,Cantabria,"[43.266473, -3.892813]",Santander
8128,Voto,Cantabria,"[43.3467008, -3.510960548422662]",
8129,Ceuta,Ceuta,"[35.89442195, -5.355817352394269]",Algeciras


In [32]:
ass.isnull().sum()

municipality            0
autonomous_community    0
municipality_coords     0
closest_glovo_city      0
dtype: int64

In [33]:
ass[ass.closest_glovo_city == ''].reset_index()

Unnamed: 0,index,municipality,autonomous_community,municipality_coords,closest_glovo_city
0,6,Asparrena,País Vasco,"[42.88625135, -2.2855075098609063]",
1,8,Baños de Ebro/Mañueta,País Vasco,"[42.5295829, -2.6789897]",
2,12,Campezo/Kanpezu,País Vasco,"[42.6691469, -2.3520513]",
3,17,Elciego,País Vasco,"[42.5153808, -2.6181739]",
4,18,Elvillar/Bilar,País Vasco,"[42.5690308, -2.5447226]",
...,...,...,...,...,...
5843,8123,Vega de Liébana,Cantabria,"[43.07968955, -4.6674983766980205]",
5844,8124,Vega de Pas,Cantabria,"[43.1585436, -3.7821043]",
5845,8126,Villaescusa,Cantabria,"[41.2055283, -5.4639875]",
5846,8128,Voto,Cantabria,"[43.3467008, -3.510960548422662]",


In [39]:
ass = ass[ass.closest_glovo_city != ''].reset_index(drop=True)
ass

Unnamed: 0,municipality,autonomous_community,municipality_coords,closest_glovo_city
0,Alegría-Dulantzi,País Vasco,"[42.8424145, -2.512674]",Vitoria
1,Amurrio,País Vasco,"[43.0525066, -3.000896]",Bilbao
2,Aramaio,País Vasco,"[43.035206099999996, -2.585761508234]",Vitoria
3,Artziniega,País Vasco,"[43.1210566, -3.1286742]",Barakaldo
4,Armiñón,País Vasco,"[42.722587, -2.8722115]",Vitoria
...,...,...,...,...
2278,Torrelavega,Cantabria,"[43.3485734, -4.0544203]",Santander
2279,Valle de Villaverde,Cantabria,"[43.232393900000005, -3.2824230157742376]",Getxo
2280,Villacarriedo,Cantabria,"[43.2306206, -3.8049489]",Santander
2281,Villafufre,Cantabria,"[43.266473, -3.892813]",Santander


In [40]:
ass.to_excel('association_table.xlsx')

## census 

### adding associated glovo-city

In [2]:
census = dd.read_parquet('../aux_data/01_census/data/locationSubset/').compute()
census.head()

Unnamed: 0,province code,Municipality code or size,Hole identifier,Final number of the person inside the hole,Person lift factor,Birth month,Year of birth,Age,Sex,Country code of nationality,...,core type,core size,Number of children,Number of common children of the kernel,Large family indicator,Type of partner (de facto or de jure),"Type of couple (same sex, different sex)",Age difference between male and female core,locationCode,location
2,1.0,59.0,65.0,1.0,12.585368,1.0,1915.0,96.0,1.0,108.0,...,1.0,1.0,0.0,0.0,1.0,1.0,1.0,6.0,1059,Vitoria-Gasteiz
3,1.0,59.0,65.0,2.0,12.585368,6.0,1923.0,88.0,6.0,108.0,...,1.0,1.0,0.0,0.0,1.0,1.0,1.0,6.0,1059,Vitoria-Gasteiz
4,1.0,59.0,138.0,1.0,14.037818,1.0,1917.0,94.0,1.0,108.0,...,,,,,,,,,1059,Vitoria-Gasteiz
5,1.0,59.0,202.0,1.0,41.444004,1.0,1918.0,93.0,1.0,108.0,...,,,,,,,,,1059,Vitoria-Gasteiz
6,1.0,59.0,202.0,2.0,41.444004,7.0,1964.0,47.0,1.0,341.0,...,,,,,,,,,1059,Vitoria-Gasteiz


In [3]:
len(census)

2085622

In [4]:
ass = pd.read_excel('association_table.xlsx', index_col=0).drop(columns='municipality_coords')
ass

Unnamed: 0,municipality,autonomous_community,closest_glovo_city
0,Alegría-Dulantzi,País Vasco,Vitoria
1,Amurrio,País Vasco,Bilbao
2,Aramaio,País Vasco,Vitoria
3,Artziniega,País Vasco,Barakaldo
4,Armiñón,País Vasco,Vitoria
...,...,...,...
2278,Torrelavega,Cantabria,Santander
2279,Valle de Villaverde,Cantabria,Getxo
2280,Villacarriedo,Cantabria,Santander
2281,Villafufre,Cantabria,Santander


In [5]:
ass[ass.municipality == 'Manresa']

Unnamed: 0,municipality,autonomous_community,closest_glovo_city
1365,Manresa,Cataluña,Terrassa


In [6]:
census = pd.merge(census, ass, left_on='location', right_on='municipality', how='inner')
census

Unnamed: 0,province code,Municipality code or size,Hole identifier,Final number of the person inside the hole,Person lift factor,Birth month,Year of birth,Age,Sex,Country code of nationality,...,Number of common children of the kernel,Large family indicator,Type of partner (de facto or de jure),"Type of couple (same sex, different sex)",Age difference between male and female core,locationCode,location,municipality,autonomous_community,closest_glovo_city
0,1.0,59.0,65.0,1.0,12.585368,1.0,1915.0,96.0,1.0,108.0,...,0.0,1.0,1.0,1.0,6.0,01059,Vitoria-Gasteiz,Vitoria-Gasteiz,País Vasco,Vitoria
1,1.0,59.0,65.0,2.0,12.585368,6.0,1923.0,88.0,6.0,108.0,...,0.0,1.0,1.0,1.0,6.0,01059,Vitoria-Gasteiz,Vitoria-Gasteiz,País Vasco,Vitoria
2,1.0,59.0,138.0,1.0,14.037818,1.0,1917.0,94.0,1.0,108.0,...,,,,,,01059,Vitoria-Gasteiz,Vitoria-Gasteiz,País Vasco,Vitoria
3,1.0,59.0,202.0,1.0,41.444004,1.0,1918.0,93.0,1.0,108.0,...,,,,,,01059,Vitoria-Gasteiz,Vitoria-Gasteiz,País Vasco,Vitoria
4,1.0,59.0,202.0,2.0,41.444004,7.0,1964.0,47.0,1.0,341.0,...,,,,,,01059,Vitoria-Gasteiz,Vitoria-Gasteiz,País Vasco,Vitoria
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1772499,51.0,1.0,1620796.0,7.0,12.236693,1.0,2007.0,4.0,6.0,108.0,...,5.0,2.0,3.0,1.0,5.0,51001,Ceuta,Ceuta,Ceuta,Algeciras
1772500,51.0,1.0,1621269.0,1.0,11.767478,11.0,1963.0,47.0,1.0,108.0,...,0.0,1.0,4.0,4.0,8.0,51001,Ceuta,Ceuta,Ceuta,Algeciras
1772501,51.0,1.0,1621269.0,2.0,11.767478,12.0,2000.0,10.0,6.0,108.0,...,,,,,,51001,Ceuta,Ceuta,Ceuta,Algeciras
1772502,51.0,1.0,1621269.0,3.0,11.767478,10.0,1998.0,13.0,6.0,108.0,...,,,,,,51001,Ceuta,Ceuta,Ceuta,Algeciras


In [10]:
census[census.closest_glovo_city.isna()]

Unnamed: 0,province code,Municipality code or size,Hole identifier,Final number of the person inside the hole,Person lift factor,Birth month,Year of birth,Age,Sex,Country code of nationality,...,Number of common children of the kernel,Large family indicator,Type of partner (de facto or de jure),"Type of couple (same sex, different sex)",Age difference between male and female core,locationCode,location,municipality,autonomous_community,closest_glovo_city


### create each glovo-city-region's share of foreigners

In [11]:
colsOfInterest = ['closest_glovo_city', 'municipality', 'autonomous_community', 'Age', 'Sex', 'Country code of nationality', 'Country of birth code']
byCity = census.groupby('closest_glovo_city')[colsOfInterest]
byCity.get_group('Vitoria')

Unnamed: 0,closest_glovo_city,municipality,autonomous_community,Age,Sex,Country code of nationality,Country of birth code
0,Vitoria,Vitoria-Gasteiz,País Vasco,96.0,1.0,108.0,108.0
1,Vitoria,Vitoria-Gasteiz,País Vasco,88.0,6.0,108.0,108.0
2,Vitoria,Vitoria-Gasteiz,País Vasco,94.0,1.0,108.0,108.0
3,Vitoria,Vitoria-Gasteiz,País Vasco,93.0,1.0,108.0,108.0
4,Vitoria,Vitoria-Gasteiz,País Vasco,47.0,1.0,341.0,341.0
5,Vitoria,Vitoria-Gasteiz,País Vasco,59.0,6.0,108.0,108.0
6,Vitoria,Vitoria-Gasteiz,País Vasco,60.0,1.0,108.0,108.0
...,...,...,...,...,...,...,...
677118,Vitoria,Arrasate/Mondragón,País Vasco,25.0,6.0,108.0,108.0
677119,Vitoria,Arrasate/Mondragón,País Vasco,22.0,6.0,108.0,108.0


In [12]:
topCountryCodes = list(census['Country of birth code'].value_counts(dropna=False).index)[:20]
topCountryCodes

[108.0,
 345.0,
 228.0,
 343.0,
 128.0,
 340.0,
 348.0,
 110.0,
 126.0,
 341.0,
 125.0,
 351.0,
 315.0,
 326.0,
 342.0,
 407.0,
 350.0,
 115.0,
 344.0,
 104.0]

In [13]:
countryCodeKeyDF = pd.read_excel('country_codes_key.xlsx')
countryCodeKeyDF

Unnamed: 0,code,country
0,102,Austria
1,103,Bélgica
2,104,Bulgaria
3,106,Chipre
4,107,Dinamarca
5,108,España
6,109,Finlandia
...,...,...
194,512,Tuvalu
195,513,Islas Cook


In [14]:
collapsedAux = pd.DataFrame()

for groupName, group in byCity.__iter__():      # for each group of municipalities assigned to one glovo-city, take all the observations of those (several) municipalities and calculate the share of different kinds of foreigners

    row = pd.Series(name=groupName, dtype=float) 
    
    row['total_observations'] = len(group)
    row['mean_age'] = group.Age.mean()
    row['mean_female_share'] = group.Sex.replace({1.0: 0, 6.0: 1}).mean()
    
    
    for code in topCountryCodes:
        share = len(group[group['Country code of nationality'] == code]) / row['total_observations']

        countryName = countryCodeKeyDF[countryCodeKeyDF.code == int(code)].country.values[0].replace(' ', '_').lower()
        row[f'share_{countryName}'] = share



    collapsedAux = collapsedAux.append(row)

collapsedAux = collapsedAux.reset_index().rename(columns={'index': 'closest_glovo_city'})

collapsedAux



Unnamed: 0,closest_glovo_city,total_observations,mean_age,mean_female_share,share_españa,share_ecuador,share_marruecos,share_colombia,share_rumanía,share_argentina,...,share_reino_unido,share_venezuela,share_cuba,share_república_dominicana,share_brasil,share_china,share_uruguay,share_italia,share_chile,share_bulgaria
0,A Coruña,31044.0,44.043197,0.528347,0.971009,0.000644,0.000902,0.002416,0.000483,0.002352,...,0.000773,0.002126,0.001707,0.001546,0.002674,0.000419,0.002545,0.001353,0.000419,0.000000
1,Aguadulce,1994.0,39.475928,0.519057,0.979438,0.000502,0.000000,0.004514,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,Albacete,11912.0,39.921592,0.519476,0.966924,0.001763,0.002015,0.007220,0.004701,0.001259,...,0.000168,0.000084,0.000336,0.000336,0.000420,0.000923,0.000084,0.000588,0.000588,0.001595
3,Alcala de Henares,18549.0,39.607364,0.507575,0.916869,0.003235,0.003127,0.006523,0.031754,0.001671,...,0.000701,0.000539,0.000701,0.001941,0.001402,0.001294,0.000323,0.001294,0.001186,0.004097
4,Alcantarilla,4274.0,37.714787,0.502340,0.912494,0.020356,0.021993,0.005615,0.006785,0.001404,...,0.001404,0.000702,0.000234,0.000000,0.000936,0.000702,0.000234,0.001170,0.000702,0.001404
5,Algeciras,15902.0,39.006289,0.515658,0.958370,0.000503,0.022639,0.000503,0.000818,0.001383,...,0.005282,0.000063,0.000063,0.000189,0.000629,0.000000,0.000063,0.001383,0.000503,0.000000
6,Alicante,31880.0,41.042535,0.519887,0.923369,0.006713,0.005395,0.008971,0.003607,0.005928,...,0.004235,0.001412,0.000910,0.001098,0.000847,0.000721,0.001004,0.005019,0.000753,0.000847
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,Valdemoro,13898.0,36.187365,0.507843,0.920348,0.006116,0.004821,0.009642,0.021946,0.002806,...,0.000863,0.001367,0.001943,0.001151,0.001511,0.000935,0.000648,0.002590,0.000791,0.000576
69,Valencia,66009.0,43.274523,0.527201,0.934418,0.008787,0.002227,0.007757,0.005772,0.002878,...,0.000985,0.001045,0.001151,0.000409,0.001091,0.001939,0.001257,0.003409,0.000757,0.001954


In [15]:
# add autonomous community to each glovo-city by looking up the most popular AC in the municipalities assigned to that glovo city
for glovoCity in collapsedAux.closest_glovo_city:
    # for each glovo city, the dominant autonomous community is those with the most municipalities for which this glovo city is the closest
    collapsedAux.loc[collapsedAux.closest_glovo_city == glovoCity, 'autonomous_community'] = ass[['closest_glovo_city', 'autonomous_community']].groupby('closest_glovo_city').get_group(glovoCity).value_counts('autonomous_community').index[0]
collapsedAux

Unnamed: 0,closest_glovo_city,total_observations,mean_age,mean_female_share,share_españa,share_ecuador,share_marruecos,share_colombia,share_rumanía,share_argentina,...,share_venezuela,share_cuba,share_república_dominicana,share_brasil,share_china,share_uruguay,share_italia,share_chile,share_bulgaria,autonomous_community
0,A Coruña,31044.0,44.043197,0.528347,0.971009,0.000644,0.000902,0.002416,0.000483,0.002352,...,0.002126,0.001707,0.001546,0.002674,0.000419,0.002545,0.001353,0.000419,0.000000,Galicia
1,Aguadulce,1994.0,39.475928,0.519057,0.979438,0.000502,0.000000,0.004514,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,Andalucía
2,Albacete,11912.0,39.921592,0.519476,0.966924,0.001763,0.002015,0.007220,0.004701,0.001259,...,0.000084,0.000336,0.000336,0.000420,0.000923,0.000084,0.000588,0.000588,0.001595,Castilla-La Mancha
3,Alcala de Henares,18549.0,39.607364,0.507575,0.916869,0.003235,0.003127,0.006523,0.031754,0.001671,...,0.000539,0.000701,0.001941,0.001402,0.001294,0.000323,0.001294,0.001186,0.004097,"Madrid, Comunidad de"
4,Alcantarilla,4274.0,37.714787,0.502340,0.912494,0.020356,0.021993,0.005615,0.006785,0.001404,...,0.000702,0.000234,0.000000,0.000936,0.000702,0.000234,0.001170,0.000702,0.001404,"Murcia, Región de"
5,Algeciras,15902.0,39.006289,0.515658,0.958370,0.000503,0.022639,0.000503,0.000818,0.001383,...,0.000063,0.000063,0.000189,0.000629,0.000000,0.000063,0.001383,0.000503,0.000000,Andalucía
6,Alicante,31880.0,41.042535,0.519887,0.923369,0.006713,0.005395,0.008971,0.003607,0.005928,...,0.001412,0.000910,0.001098,0.000847,0.000721,0.001004,0.005019,0.000753,0.000847,Comunitat Valenciana
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,Valdemoro,13898.0,36.187365,0.507843,0.920348,0.006116,0.004821,0.009642,0.021946,0.002806,...,0.001367,0.001943,0.001151,0.001511,0.000935,0.000648,0.002590,0.000791,0.000576,Castilla-La Mancha
69,Valencia,66009.0,43.274523,0.527201,0.934418,0.008787,0.002227,0.007757,0.005772,0.002878,...,0.001045,0.001151,0.000409,0.001091,0.001939,0.001257,0.003409,0.000757,0.001954,Comunitat Valenciana


In [16]:
collapsedAux.autonomous_community.value_counts(dropna=False)

Andalucía                      15
Cataluña                       12
Madrid, Comunidad de            9
Comunitat Valenciana            8
Galicia                         5
Castilla y León                 5
País Vasco                      5
Castilla-La Mancha              3
Murcia, Región de               3
Asturias, Principado de         2
Balears, Illes                  2
Canarias                        2
Extremadura                     1
Navarra, Comunidad Foral de     1
Cantabria                       1
Aragón                          1
Name: autonomous_community, dtype: int64

In [17]:
sorted(collapsedAux.autonomous_community.unique())

['Andalucía',
 'Aragón',
 'Asturias, Principado de',
 'Balears, Illes',
 'Canarias',
 'Cantabria',
 'Castilla y León',
 'Castilla-La Mancha',
 'Cataluña',
 'Comunitat Valenciana',
 'Extremadura',
 'Galicia',
 'Madrid, Comunidad de',
 'Murcia, Región de',
 'Navarra, Comunidad Foral de',
 'País Vasco']

In [18]:
sorted(ass.autonomous_community.unique())
# -> Ceuta & Melilla are missing (are independent cities anyway)

['Andalucía',
 'Aragón',
 'Asturias, Principado de',
 'Balears, Illes',
 'Canarias',
 'Cantabria',
 'Castilla y León',
 'Castilla-La Mancha',
 'Cataluña',
 'Ceuta',
 'Comunitat Valenciana',
 'Extremadura',
 'Galicia',
 'Madrid, Comunidad de',
 'Murcia, Región de',
 'Navarra, Comunidad Foral de',
 'País Vasco',
 'Rioja, La']

In [19]:
collapsedAux.to_excel('collapsedAux.xlsx')

## ESS  

### select years and countries

In [23]:
ESS = pd.read_csv('../aux_data/03_xenophobia/ESS/data/columnSubset_normalised_indeces.csv', index_col=0, low_memory=False)
ESS = ESS[
    (ESS.cntry == 'ES')  
    & (ESS.year >= 2010) 
    & ~(ESS.region_decoded.isin(['Ciudad Autónoma de Ceuta', 'Ciudad Autónoma de Melilla'])) # only excludes 34 observations
    ]
ESS.reset_index(drop=True)

Unnamed: 0,imsmetn,imdfetn,impcntr,imbgeco,imueclt,imwbcnt,cntry,idno,year,anctry1,...,region_decoded,imsmetn_norm,imdfetn_norm,impcntr_norm,imbgeco_norm,imueclt_norm,imwbcnt_norm,index_antiimmigration,index_xenophobia,index_all
0,3.0,3.0,3.0,8.0,8.0,8.0,ES,1.0,2010,,...,Comunidad de Madrid,0.452481,0.210653,0.131665,0.058908,-0.985295,0.063442,0.213427,-0.460927,-0.011358
1,4.0,4.0,4.0,0.0,0.0,0.0,ES,2.0,2010,,...,País Vasco,1.176814,0.939987,0.839108,0.494751,2.108017,0.493011,0.862665,1.300514,1.008615
2,2.0,2.0,2.0,6.0,8.0,5.0,ES,3.0,2010,,...,Castilla y León,-0.271852,-0.518681,-0.575777,0.167869,-0.985295,0.224530,-0.299611,-0.380383,-0.326535
3,4.0,4.0,4.0,0.0,0.0,5.0,ES,6.0,2010,,...,Comunidad Valenciana,1.176814,0.939987,0.839108,0.494751,2.108017,0.224530,0.862665,1.166273,0.963868
4,2.0,2.0,2.0,6.0,6.0,6.0,ES,8.0,2010,,...,Galicia,-0.271852,-0.518681,-0.575777,0.167869,-0.211967,0.170834,-0.299611,-0.020567,-0.206596
5,1.0,2.0,2.0,5.0,8.0,5.0,ES,9.0,2010,,...,Illes Balears,-0.996185,-0.518681,-0.575777,0.222349,-0.985295,0.224530,-0.467074,-0.380383,-0.438177
6,3.0,3.0,3.0,5.0,6.0,5.0,ES,10.0,2010,,...,Comunidad de Madrid,0.452481,0.210653,0.131665,0.222349,-0.211967,0.224530,0.254287,0.006281,0.171618
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9284,2.0,3.0,3.0,7.0,5.0,88.0,ES,69631.0,2018,13070.0,...,Comunidad de Madrid,-0.271852,0.210653,0.131665,0.113389,0.174697,-4.232249,0.045964,-2.028776,-0.645616
9285,2.0,2.0,2.0,6.0,7.0,6.0,ES,69685.0,2018,13078.0,...,Cantabria,-0.271852,-0.518681,-0.575777,0.167869,-0.598631,0.170834,-0.299611,-0.213899,-0.271040


In [24]:
ESS.year.value_counts()

2016    1951
2014    1916
2012    1883
2010    1880
2018    1661
Name: year, dtype: int64

In [25]:
ESS.region_decoded.value_counts()

Andalucía                     1862
Comunidad de Madrid           1312
Cataluña                      1268
Comunidad Valenciana           924
Galicia                        622
Castilla y León                544
Castilla-La Mancha             448
País Vasco                     427
Canarias                       354
Aragón                         308
Extremadura                    252
Región de Murcia               248
Principado de Asturias         228
Illes Balears                  180
Cantabria                      130
Comunidad Foral de Navarra     126
La Rioja                        58
Name: region_decoded, dtype: int64

### fuzzy matching

In [26]:
sorted(ESS.region_decoded.unique())

['Andalucía',
 'Aragón',
 'Canarias',
 'Cantabria',
 'Castilla y León',
 'Castilla-La Mancha',
 'Cataluña',
 'Comunidad Foral de Navarra',
 'Comunidad Valenciana',
 'Comunidad de Madrid',
 'Extremadura',
 'Galicia',
 'Illes Balears',
 'La Rioja',
 'País Vasco',
 'Principado de Asturias',
 'Región de Murcia']

In [27]:
sorted(collapsedAux.autonomous_community.unique())

['Andalucía',
 'Aragón',
 'Asturias, Principado de',
 'Balears, Illes',
 'Canarias',
 'Cantabria',
 'Castilla y León',
 'Castilla-La Mancha',
 'Cataluña',
 'Comunitat Valenciana',
 'Extremadura',
 'Galicia',
 'Madrid, Comunidad de',
 'Murcia, Región de',
 'Navarra, Comunidad Foral de',
 'País Vasco']

In [28]:
collapsedESS = ESS[['region_decoded', 'index_antiimmigration', 'index_xenophobia', 'index_all']].groupby('region_decoded').mean().reset_index()
collapsedESS

Unnamed: 0,region_decoded,index_antiimmigration,index_xenophobia,index_all
0,Andalucía,-0.024655,-0.132005,-0.048291
1,Aragón,0.049386,-0.133809,-0.005413
2,Canarias,-0.008962,-0.082150,-0.029668
3,Cantabria,0.023196,-0.185491,-0.038931
4,Castilla y León,0.091483,-0.113672,0.030717
5,Castilla-La Mancha,0.023379,-0.067050,0.004133
6,Cataluña,-0.040758,-0.129260,-0.065620
...,...,...,...,...
10,Extremadura,0.029390,-0.123065,-0.014289
11,Galicia,-0.103921,-0.140070,-0.100273


In [29]:
from fuzzywuzzy import process

process.extract('Principado de Asturias', choices=collapsedAux.autonomous_community.unique())


[('Asturias, Principado de', 95),
 ('Murcia, Región de', 56),
 ('Canarias', 56),
 ('Galicia', 51),
 ('Cataluña', 51)]

In [30]:
process.extractOne('Principado de Asturias', choices=collapsedAux.autonomous_community.unique(), score_cutoff=90)

('Asturias, Principado de', 95)

In [31]:
collapsedESS['autonomous_community'] = collapsedESS.region_decoded.apply(lambda region: process.extractOne(region, choices=collapsedAux.autonomous_community.unique(), score_cutoff=90))
pd.options.display.min_rows = 50
collapsedESS

Unnamed: 0,region_decoded,index_antiimmigration,index_xenophobia,index_all,autonomous_community
0,Andalucía,-0.024655,-0.132005,-0.048291,"(Andalucía, 100)"
1,Aragón,0.049386,-0.133809,-0.005413,"(Aragón, 100)"
2,Canarias,-0.008962,-0.08215,-0.029668,"(Canarias, 100)"
3,Cantabria,0.023196,-0.185491,-0.038931,"(Cantabria, 100)"
4,Castilla y León,0.091483,-0.113672,0.030717,"(Castilla y León, 100)"
5,Castilla-La Mancha,0.023379,-0.06705,0.004133,"(Castilla-La Mancha, 100)"
6,Cataluña,-0.040758,-0.12926,-0.06562,"(Cataluña, 100)"
7,Comunidad Foral de Navarra,-0.156991,-0.130415,-0.143776,"(Navarra, Comunidad Foral de, 95)"
8,Comunidad Valenciana,-0.060331,-0.146607,-0.080495,"(Comunitat Valenciana, 90)"
9,Comunidad de Madrid,-0.072231,-0.245129,-0.121979,"(Madrid, Comunidad de, 95)"


In [33]:
# use association table here, because La Rioja does not exist in collapsedAux data 
collapsedESS['autonomous_community'] = collapsedESS.region_decoded.apply(lambda region: process.extractOne(region, choices=ass.autonomous_community.unique(), score_cutoff=90)[0])
collapsedESS

Unnamed: 0,region_decoded,index_antiimmigration,index_xenophobia,index_all,autonomous_community
0,Andalucía,-0.024655,-0.132005,-0.048291,Andalucía
1,Aragón,0.049386,-0.133809,-0.005413,Aragón
2,Canarias,-0.008962,-0.08215,-0.029668,Canarias
3,Cantabria,0.023196,-0.185491,-0.038931,Cantabria
4,Castilla y León,0.091483,-0.113672,0.030717,Castilla y León
5,Castilla-La Mancha,0.023379,-0.06705,0.004133,Castilla-La Mancha
6,Cataluña,-0.040758,-0.12926,-0.06562,Cataluña
7,Comunidad Foral de Navarra,-0.156991,-0.130415,-0.143776,"Navarra, Comunidad Foral de"
8,Comunidad Valenciana,-0.060331,-0.146607,-0.080495,Comunitat Valenciana
9,Comunidad de Madrid,-0.072231,-0.245129,-0.121979,"Madrid, Comunidad de"


### merging ESS to aux

In [34]:
collapsedAux = pd.merge(collapsedAux, collapsedESS, on='autonomous_community', how='inner').drop(columns=['region_decoded'])
collapsedAux

Unnamed: 0,closest_glovo_city,total_observations,mean_age,mean_female_share,share_españa,share_ecuador,share_marruecos,share_colombia,share_rumanía,share_argentina,...,share_brasil,share_china,share_uruguay,share_italia,share_chile,share_bulgaria,autonomous_community,index_antiimmigration,index_xenophobia,index_all
0,A Coruña,31044.0,44.043197,0.528347,0.971009,0.000644,0.000902,0.002416,0.000483,0.002352,...,0.002674,0.000419,0.002545,0.001353,0.000419,0.000000,Galicia,-0.103921,-0.140070,-0.100273
1,Ourense,6888.0,45.570412,0.539925,0.970383,0.000145,0.001597,0.004646,0.000726,0.001887,...,0.002613,0.000290,0.000145,0.000581,0.000000,0.000000,Galicia,-0.103921,-0.140070,-0.100273
2,Pontevedra,7642.0,41.679011,0.528134,0.968856,0.000393,0.002617,0.004580,0.000785,0.001701,...,0.003402,0.001570,0.001309,0.000523,0.000000,0.000131,Galicia,-0.103921,-0.140070,-0.100273
3,Santiago de Compostela,6293.0,44.316542,0.535516,0.969967,0.000477,0.000636,0.000477,0.000318,0.002225,...,0.001271,0.000000,0.000477,0.002066,0.001112,0.000000,Galicia,-0.103921,-0.140070,-0.100273
4,Vigo,26714.0,43.557498,0.526353,0.962492,0.000225,0.000187,0.002695,0.001984,0.003107,...,0.003968,0.000262,0.003144,0.001310,0.000337,0.000000,Galicia,-0.103921,-0.140070,-0.100273
5,Aguadulce,1994.0,39.475928,0.519057,0.979438,0.000502,0.000000,0.004514,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,Andalucía,-0.024655,-0.132005,-0.048291
6,Algeciras,15902.0,39.006289,0.515658,0.958370,0.000503,0.022639,0.000503,0.000818,0.001383,...,0.000629,0.000000,0.000063,0.001383,0.000503,0.000000,Andalucía,-0.024655,-0.132005,-0.048291
7,Almeria,12780.0,40.054617,0.528013,0.946870,0.004930,0.017293,0.005008,0.005243,0.004695,...,0.000782,0.000548,0.000000,0.000861,0.000156,0.000782,Andalucía,-0.024655,-0.132005,-0.048291
8,Cadiz,7926.0,44.179157,0.535327,0.989780,0.000379,0.001514,0.000505,0.000126,0.000883,...,0.000505,0.000505,0.000000,0.000505,0.000000,0.000000,Andalucía,-0.024655,-0.132005,-0.048291
9,Cordoba,24363.0,41.508394,0.523868,0.983623,0.002216,0.000985,0.001888,0.000616,0.000903,...,0.000164,0.000246,0.000287,0.000287,0.000205,0.000041,Andalucía,-0.024655,-0.132005,-0.048291


In [35]:
collapsedAux.isna().sum()

closest_glovo_city            0
total_observations            0
mean_age                      0
mean_female_share             0
share_españa                  0
share_ecuador                 0
share_marruecos               0
share_colombia                0
share_rumanía                 0
share_argentina               0
share_perú                    0
share_francia                 0
share_alemania                0
share_bolivia                 0
share_reino_unido             0
share_venezuela               0
share_cuba                    0
share_república_dominicana    0
share_brasil                  0
share_china                   0
share_uruguay                 0
share_italia                  0
share_chile                   0
share_bulgaria                0
autonomous_community          0
index_antiimmigration         0
index_xenophobia              0
index_all                     0
dtype: int64

In [36]:
collapsedAux.autonomous_community.value_counts(dropna=False)

Andalucía                      15
Cataluña                       12
Madrid, Comunidad de            9
Comunitat Valenciana            8
Galicia                         5
Castilla y León                 5
País Vasco                      5
Castilla-La Mancha              3
Murcia, Región de               3
Asturias, Principado de         2
Balears, Illes                  2
Canarias                        2
Extremadura                     1
Navarra, Comunidad Foral de     1
Cantabria                       1
Aragón                          1
Name: autonomous_community, dtype: int64

In [37]:
collapsedAux.to_excel('collapsed_aux.xlsx')

## glovo  

### collapse glovo orders by city, week, culinary culture

In [38]:
glovo = pd.read_csv('../outputs/restaurant_aggregation_spain.csv', index_col=0).rename(columns={'culinary_origin': 'culinary_culture', 'city': 'city_code'})
glovo

Unnamed: 0,year,week,store_name,store_address_id,city_code,culinary_culture,store_tag,orders_this_restaurant_this_week,orders_this_city_this_week,date
0,2016,6,Carrefour,105.0,BCN,other,Groceries,2,10,2016-02-12
1,2016,6,Chivuo's,35.0,BCN,US,Burger,1,10,2016-02-12
2,2016,6,Chok Barcelona,75.0,BCN,,,2,10,2016-02-12
3,2016,6,Comaxurros,84.0,BCN,,,1,10,2016-02-12
4,2016,6,Greenshots,72.0,BCN,,,1,10,2016-02-12
5,2016,6,Parking Pizza,71.0,BCN,Italy,Pizza & Italian,1,10,2016-02-12
6,2016,6,Pedidos Especiales Parafarmacia,3.0,BCN,,,1,10,2016-02-12
7,2016,6,Petit Bangkok,82.0,BCN,Thailand,Thai,1,10,2016-02-12
8,2016,7,Carrefour,108.0,BCN,other,Groceries,1,18,2016-02-19
9,2016,7,Carrefour,109.0,BCN,other,Groceries,1,18,2016-02-19


In [39]:
collapsedGlovo = (
    glovo
    .drop(columns=['store_address_id'])
    .groupby(['year', 'week', 'city_code', 'culinary_culture'])
    .agg({
        'orders_this_restaurant_this_week': 'sum', 
        'orders_this_city_this_week':       'first', # since this aggregation (city + culinary_culture) is MORE specific than just (city), its fine to take the first value
    })
    .reset_index()
)

collapsedGlovo

Unnamed: 0,year,week,city_code,culinary_culture,orders_this_restaurant_this_week,orders_this_city_this_week
0,2016,6,BCN,Italy,1,10
1,2016,6,BCN,Thailand,1,10
2,2016,6,BCN,US,1,10
3,2016,6,BCN,other,2,10
4,2016,7,BCN,Thailand,1,18
5,2016,7,BCN,US,1,18
6,2016,7,BCN,other,8,18
7,2016,7,MAD,Japan,3,8
8,2016,7,MAD,US,1,8
9,2016,7,MAD,other,2,8


### decode city_code

In [40]:
glovoCityCodes = pd.read_excel('glovo_city_codes.xlsx')
glovoCityCodes

Unnamed: 0,city_code,city_name
0,AGU,Aguadulce
1,ALB,Albacete
2,ALC,Alicante
3,ALG,Algeciras
4,ALM,Almeria
5,ALR,Alcantarilla
6,AVL,Avila
7,BCN,Barcelona
8,BIL,Bilbao
9,BJZ,Badajoz


In [41]:
collapsedGlovo = pd.merge(collapsedGlovo, glovoCityCodes, on='city_code', how='left')
collapsedGlovo

Unnamed: 0,year,week,city_code,culinary_culture,orders_this_restaurant_this_week,orders_this_city_this_week,city_name
0,2016,6,BCN,Italy,1,10,Barcelona
1,2016,6,BCN,Thailand,1,10,Barcelona
2,2016,6,BCN,US,1,10,Barcelona
3,2016,6,BCN,other,2,10,Barcelona
4,2016,7,BCN,Thailand,1,18,Barcelona
5,2016,7,BCN,US,1,18,Barcelona
6,2016,7,BCN,other,8,18,Barcelona
7,2016,7,MAD,Japan,3,8,Madrid
8,2016,7,MAD,US,1,8,Madrid
9,2016,7,MAD,other,2,8,Madrid


In [42]:
collapsedGlovo.city_name.isna().sum()

0

In [43]:
collapsedGlovo.to_excel('collapsed_glovo.xlsx')

### merge glovo to aux

In [45]:
collapsedAux = pd.read_excel('collapsed_aux.xlsx', index_col=0)
collapsedAux

Unnamed: 0,closest_glovo_city,total_observations,mean_age,mean_female_share,share_españa,share_ecuador,share_marruecos,share_colombia,share_rumanía,share_argentina,...,share_brasil,share_china,share_uruguay,share_italia,share_chile,share_bulgaria,autonomous_community,index_antiimmigration,index_xenophobia,index_all
0,A Coruña,31044,44.043197,0.528347,0.971009,0.000644,0.000902,0.002416,0.000483,0.002352,...,0.002674,0.000419,0.002545,0.001353,0.000419,0.000000,Galicia,-0.103921,-0.140070,-0.100273
1,Ourense,6888,45.570412,0.539925,0.970383,0.000145,0.001597,0.004646,0.000726,0.001887,...,0.002613,0.000290,0.000145,0.000581,0.000000,0.000000,Galicia,-0.103921,-0.140070,-0.100273
2,Pontevedra,7642,41.679011,0.528134,0.968856,0.000393,0.002617,0.004580,0.000785,0.001701,...,0.003402,0.001570,0.001309,0.000523,0.000000,0.000131,Galicia,-0.103921,-0.140070,-0.100273
3,Santiago de Compostela,6293,44.316542,0.535516,0.969967,0.000477,0.000636,0.000477,0.000318,0.002225,...,0.001271,0.000000,0.000477,0.002066,0.001112,0.000000,Galicia,-0.103921,-0.140070,-0.100273
4,Vigo,26714,43.557498,0.526353,0.962492,0.000225,0.000187,0.002695,0.001984,0.003107,...,0.003968,0.000262,0.003144,0.001310,0.000337,0.000000,Galicia,-0.103921,-0.140070,-0.100273
5,Aguadulce,1994,39.475928,0.519057,0.979438,0.000502,0.000000,0.004514,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,Andalucía,-0.024655,-0.132005,-0.048291
6,Algeciras,15902,39.006289,0.515658,0.958370,0.000503,0.022639,0.000503,0.000818,0.001383,...,0.000629,0.000000,0.000063,0.001383,0.000503,0.000000,Andalucía,-0.024655,-0.132005,-0.048291
7,Almeria,12780,40.054617,0.528013,0.946870,0.004930,0.017293,0.005008,0.005243,0.004695,...,0.000782,0.000548,0.000000,0.000861,0.000156,0.000782,Andalucía,-0.024655,-0.132005,-0.048291
8,Cadiz,7926,44.179157,0.535327,0.989780,0.000379,0.001514,0.000505,0.000126,0.000883,...,0.000505,0.000505,0.000000,0.000505,0.000000,0.000000,Andalucía,-0.024655,-0.132005,-0.048291
9,Cordoba,24363,41.508394,0.523868,0.983623,0.002216,0.000985,0.001888,0.000616,0.000903,...,0.000164,0.000246,0.000287,0.000287,0.000205,0.000041,Andalucía,-0.024655,-0.132005,-0.048291


In [46]:
collapsed = pd.merge(collapsedGlovo, collapsedAux, left_on='city_name', right_on='closest_glovo_city', how='left').drop(columns='closest_glovo_city')
collapsed

Unnamed: 0,year,week,city_code,culinary_culture,orders_this_restaurant_this_week,orders_this_city_this_week,city_name,total_observations,mean_age,mean_female_share,...,share_brasil,share_china,share_uruguay,share_italia,share_chile,share_bulgaria,autonomous_community,index_antiimmigration,index_xenophobia,index_all
0,2016,6,BCN,Italy,1,10,Barcelona,142585.0,45.374100,0.532707,...,0.001957,0.001943,0.000982,0.005898,0.001781,0.000477,Cataluña,-0.040758,-0.129260,-0.065620
1,2016,6,BCN,Thailand,1,10,Barcelona,142585.0,45.374100,0.532707,...,0.001957,0.001943,0.000982,0.005898,0.001781,0.000477,Cataluña,-0.040758,-0.129260,-0.065620
2,2016,6,BCN,US,1,10,Barcelona,142585.0,45.374100,0.532707,...,0.001957,0.001943,0.000982,0.005898,0.001781,0.000477,Cataluña,-0.040758,-0.129260,-0.065620
3,2016,6,BCN,other,2,10,Barcelona,142585.0,45.374100,0.532707,...,0.001957,0.001943,0.000982,0.005898,0.001781,0.000477,Cataluña,-0.040758,-0.129260,-0.065620
4,2016,7,BCN,Thailand,1,18,Barcelona,142585.0,45.374100,0.532707,...,0.001957,0.001943,0.000982,0.005898,0.001781,0.000477,Cataluña,-0.040758,-0.129260,-0.065620
5,2016,7,BCN,US,1,18,Barcelona,142585.0,45.374100,0.532707,...,0.001957,0.001943,0.000982,0.005898,0.001781,0.000477,Cataluña,-0.040758,-0.129260,-0.065620
6,2016,7,BCN,other,8,18,Barcelona,142585.0,45.374100,0.532707,...,0.001957,0.001943,0.000982,0.005898,0.001781,0.000477,Cataluña,-0.040758,-0.129260,-0.065620
7,2016,7,MAD,Japan,3,8,Madrid,232122.0,44.400561,0.537597,...,0.001460,0.001874,0.000383,0.002921,0.000999,0.001620,"Madrid, Comunidad de",-0.072231,-0.245129,-0.121979
8,2016,7,MAD,US,1,8,Madrid,232122.0,44.400561,0.537597,...,0.001460,0.001874,0.000383,0.002921,0.000999,0.001620,"Madrid, Comunidad de",-0.072231,-0.245129,-0.121979
9,2016,7,MAD,other,2,8,Madrid,232122.0,44.400561,0.537597,...,0.001460,0.001874,0.000383,0.002921,0.000999,0.001620,"Madrid, Comunidad de",-0.072231,-0.245129,-0.121979


In [47]:
collapsed = collapsed.rename(columns={'orders_this_restaurant_this_week': 'orders_this_culture_this_week'})

In [48]:
# check that all city_codes only have one unique city_name
for cityCode in collapsed.city_code.unique():
    assert len(collapsed[collapsed.city_code == cityCode].city_name.unique()) == 1
print('check passed')


check passed


In [49]:
collapsed.to_excel('collapsed_all.xlsx') # to look at
collapsed.to_csv('collapsed_all.csv') # to use