In [1]:
import os
from time import sleep
import json
from tqdm import tqdm
tqdm.pandas(desc='pandas')

import pandas as pd
import numpy as np
from dask import dataframe as dd

from geopy.geocoders import Nominatim
geoEncoder = Nominatim(user_agent='spanish-cities')

def backup(dic, path):
    with open(path,'w') as f:
        json.dump(obj=dic, fp=f, indent=4)

def restore(path):
    with open(path) as f:
        return json.load(f)
        
def distance(coords1: tuple, coords2: tuple):
    return np.sqrt((coords1[0] - coords2[0])**2  +  (coords1[1] - coords2[1])**2)



## city associations

### get glovo city coords

In [3]:
df = pd.read_excel('glovo_city_codes.xlsx', index_col=0)

if not os.path.exists('glovoCityCoords.json'):
    glovoCityCoords = dict.fromkeys(list(df.city_name), np.nan)  

    for city in tqdm(glovoCityCoords.keys()):
        sleep(1)
        try:    
            geoInfo = geoEncoder.geocode(f'{city}, Spain').raw
            glovoCityCoords[city] = float(geoInfo['lat']), float(geoInfo['lon'])
        except:
            glovoCityCoords[city] = np.nan, np.nan

    backup(glovoCityCoords, 'glovoCityCoords.json')

else:
    glovoCityCoords = restore('glovoCityCoords.json')

glovoCityCoords

{'Aguadulce': [37.2530195, -4.9906167],
 'Albacete': [38.9950921, -1.8559154],
 'Alicante': [38.3436365, -0.4881708],
 'Algeciras': [36.1311725, -5.4473991],
 'Almeria': [36.8414197, -2.4628135],
 'Alcantarilla ': [37.9680342, -1.214954],
 'Avila': [40.656478, -4.7002172],
 'Barcelona': [41.3828939, 2.1774322],
 'Bilbao': [43.2630018, -2.9350039],
 'Badajoz': [38.88964355, -6.980142450723942],
 'Barakaldo': [43.29548, -2.9900933],
 'Benidorm': [38.5406255, -0.1290929],
 'Brunete': [40.4050499, -3.9979912],
 'Cartagena': [37.6019353, -0.9841152],
 'Cadiz': [36.5315575993944, -6.280563331226297],
 'Colmenar Viejo': [40.6587726, -3.7659722],
 'Castelldefels': [41.2861022, 1.9824173],
 'Castellón de la Plana': [39.9860347, -0.0377354],
 'Sant Cugat del Valles': [41.4728432, 2.0817809],
 'Donosita': [43.39907934659774, -1.975314531094155],
 'Elche': [38.2653307, -0.6988391],
 'Fuenlabrada': [40.282476, -3.7923422],
 'San Fernando': [36.4643934, -6.198203],
 'Gava': [41.3050933, 2.0063126],


In [4]:
# check how many glovo cities have no coordinates
[city for city, coords in glovoCityCoords.items() if np.isnan(coords[0])]

[]

In [45]:
# lookup missings manually
glovoCityCoords['Cadiz'] = [36.5315575993944, -6.280563331226297]
glovoCityCoords['Donosita'] = [43.39907934659774, -1.975314531094155]
glovoCityCoords['Valencia'] = [39.47424750070927, -0.37628868297826856]

# del glovoCityCoords['Foligno']


backup(glovoCityCoords, 'glovoCityCoords.json')

### find closest glovo city for all census towns

In [54]:
municipalities = pd.read_excel('../aux_data/01_census/data/provincial and municipal codes.xlsx', usecols=['CODAUTO', 'NOMBRE'])
municipalities

Unnamed: 0,CODAUTO,NOMBRE
0,16,Alegría-Dulantzi
1,16,Amurrio
2,16,Aramaio
3,16,Artziniega
4,16,Armiñón
...,...,...
8126,2,"Zaida, La"
8127,2,Zaragoza
8128,2,Zuera
8129,18,Ceuta


In [65]:
autoCommCodes = pd.read_excel('autonomousCommunitiesCodes.xlsx', usecols=['CODAUTO', 'Comunidad Autónoma']).drop_duplicates().reset_index(drop=True)
autoCommCodes

Unnamed: 0,CODAUTO,Comunidad Autónoma
0,1,Andalucía
1,2,Aragón
2,3,"Asturias, Principado de"
3,4,"Balears, Illes"
4,5,Canarias
...,...,...
14,15,"Navarra, Comunidad Foral de"
15,16,País Vasco
16,17,"Rioja, La"
17,18,Ceuta


In [85]:
# association table
ass = pd.merge(municipalities, autoCommCodes, on='CODAUTO', how='inner')
ass = (
    ass
    .drop(columns=['CODAUTO'])
    .rename(columns={
        'Comunidad Autónoma': 'autonomous_community', 
        'NOMBRE': 'municipality',
        })
)
ass

Unnamed: 0,municipality,autonomous_community
0,Alegría-Dulantzi,País Vasco
1,Amurrio,País Vasco
2,Aramaio,País Vasco
3,Artziniega,País Vasco
4,Armiñón,País Vasco
...,...,...
8126,Villaescusa,Cantabria
8127,Villafufre,Cantabria
8128,Voto,Cantabria
8129,Ceuta,Ceuta


In [103]:
def getCoords(location: str):
    sleep(0.8)
    try:    
        geoInfo = geoEncoder.geocode(f'{location}, Spain').raw
        return float(geoInfo['lat']), float(geoInfo['lon'])
    except:
        return np.nan, np.nan

def retrieveCoords(city: str, censusMunicipalityCoords: dict):
    if (city in censusMunicipalityCoords) & (not np.isnan(censusMunicipalityCoords[city][0])):
        return censusMunicipalityCoords[city]
    else:
        return getCoords(city)
        

def findClosestGlovoCity(testCoords: tuple, glovoCityCoords: dict):
    theFavourite = ('', np.inf)

    for city, coords in glovoCityCoords.items():
        d = distance(testCoords, coords)

        if d < theFavourite[1]:
            theFavourite = (city, d)

    return theFavourite[0]

In [105]:
censusMunicipalityCoords = dict.fromkeys(list(municipalities.NOMBRE), np.nan) if not os.path.exists('censusMunicipalityCoords.json') else restore('censusMunicipalityCoords.json')

ass['municipality_coords'] = ass.municipality.progress_apply(retrieveCoords, censusMunicipalityCoords=censusMunicipalityCoords)
ass['closest_glovo_city'] = ass.municipality_coords.progress_apply(findClosestGlovoCity, glovoCityCoords=glovoCityCoords)
ass



pandas: 100%|██████████| 8131/8131 [04:43<00:00, 28.70it/s]  
pandas: 100%|██████████| 8131/8131 [00:01<00:00, 7169.55it/s]


Unnamed: 0,municipality,autonomous_community,municipality_coords,closest_glovo_city
0,Alegría-Dulantzi,País Vasco,"[42.8424145, -2.512674]",Vitoria
1,Amurrio,País Vasco,"[43.0525066, -3.000896]",Bilbao
2,Aramaio,País Vasco,"[43.035206099999996, -2.585761508234]",Vitoria
3,Artziniega,País Vasco,"[43.1210566, -3.1286742]",Barakaldo
4,Armiñón,País Vasco,"[42.722587, -2.8722115]",Vitoria
...,...,...,...,...
8126,Villaescusa,Cantabria,"[41.2055283, -5.4639875]",Salamanca
8127,Villafufre,Cantabria,"[43.266473, -3.892813]",Santander
8128,Voto,Cantabria,"[43.3467008, -3.510960548422662]",Santander
8129,Ceuta,Ceuta,"[35.89442195, -5.355817352394269]",Algeciras


In [106]:
ass.isnull().sum()

municipality            0
autonomous_community    0
municipality_coords     0
closest_glovo_city      0
dtype: int64

In [107]:
ass[ass.closest_glovo_city == ''].reset_index()

Unnamed: 0,index,municipality,autonomous_community,municipality_coords,closest_glovo_city
0,1239,"Fondó de les Neus, el/Hondón de las Nieves",Comunitat Valenciana,"(nan, nan)",
1,2164,"Palma del Condado, La",Andalucía,"(nan, nan)",
2,4218,"Riba de Escalote, La",Castilla y León,"(nan, nan)",
3,5708,"Vall d'en Bas, La",Cataluña,"(nan, nan)",
4,6718,"Cañada de Verich, La",Aragón,"(nan, nan)",


In [109]:
ass.to_excel('association_table.xlsx')

## census 

### adding associated glovo-city

In [5]:
census = dd.read_parquet('../aux_data/01_census/data/locationSubset/').compute()
census.head()

Unnamed: 0,province code,Municipality code or size,Hole identifier,Final number of the person inside the hole,Person lift factor,Birth month,Year of birth,Age,Sex,Country code of nationality,...,core type,core size,Number of children,Number of common children of the kernel,Large family indicator,Type of partner (de facto or de jure),"Type of couple (same sex, different sex)",Age difference between male and female core,locationCode,location
2,1.0,59.0,65.0,1.0,12.585368,1.0,1915.0,96.0,1.0,108.0,...,1.0,1.0,0.0,0.0,1.0,1.0,1.0,6.0,1059,Vitoria-Gasteiz
3,1.0,59.0,65.0,2.0,12.585368,6.0,1923.0,88.0,6.0,108.0,...,1.0,1.0,0.0,0.0,1.0,1.0,1.0,6.0,1059,Vitoria-Gasteiz
4,1.0,59.0,138.0,1.0,14.037818,1.0,1917.0,94.0,1.0,108.0,...,,,,,,,,,1059,Vitoria-Gasteiz
5,1.0,59.0,202.0,1.0,41.444004,1.0,1918.0,93.0,1.0,108.0,...,,,,,,,,,1059,Vitoria-Gasteiz
6,1.0,59.0,202.0,2.0,41.444004,7.0,1964.0,47.0,1.0,341.0,...,,,,,,,,,1059,Vitoria-Gasteiz


In [6]:
len(census)

2085622

In [8]:
ass = pd.read_excel('association_table.xlsx', index_col=0).drop(columns='municipality_coords')
ass

Unnamed: 0,municipality,autonomous_community,closest_glovo_city
0,Alegría-Dulantzi,País Vasco,Vitoria
1,Amurrio,País Vasco,Bilbao
2,Aramaio,País Vasco,Vitoria
3,Artziniega,País Vasco,Barakaldo
4,Armiñón,País Vasco,Vitoria
...,...,...,...
8126,Villaescusa,Cantabria,Salamanca
8127,Villafufre,Cantabria,Santander
8128,Voto,Cantabria,Santander
8129,Ceuta,Ceuta,Algeciras


In [9]:
ass[ass.municipality == 'Manresa']

Unnamed: 0,municipality,autonomous_community,closest_glovo_city
5318,Manresa,Cataluña,Terrassa


In [10]:
census = pd.merge(census, ass, left_on='location', right_on='municipality', how='left')
census.head()

Unnamed: 0,province code,Municipality code or size,Hole identifier,Final number of the person inside the hole,Person lift factor,Birth month,Year of birth,Age,Sex,Country code of nationality,...,Number of common children of the kernel,Large family indicator,Type of partner (de facto or de jure),"Type of couple (same sex, different sex)",Age difference between male and female core,locationCode,location,municipality,autonomous_community,closest_glovo_city
0,1.0,59.0,65.0,1.0,12.585368,1.0,1915.0,96.0,1.0,108.0,...,0.0,1.0,1.0,1.0,6.0,1059,Vitoria-Gasteiz,Vitoria-Gasteiz,País Vasco,Vitoria
1,1.0,59.0,65.0,2.0,12.585368,6.0,1923.0,88.0,6.0,108.0,...,0.0,1.0,1.0,1.0,6.0,1059,Vitoria-Gasteiz,Vitoria-Gasteiz,País Vasco,Vitoria
2,1.0,59.0,138.0,1.0,14.037818,1.0,1917.0,94.0,1.0,108.0,...,,,,,,1059,Vitoria-Gasteiz,Vitoria-Gasteiz,País Vasco,Vitoria
3,1.0,59.0,202.0,1.0,41.444004,1.0,1918.0,93.0,1.0,108.0,...,,,,,,1059,Vitoria-Gasteiz,Vitoria-Gasteiz,País Vasco,Vitoria
4,1.0,59.0,202.0,2.0,41.444004,7.0,1964.0,47.0,1.0,341.0,...,,,,,,1059,Vitoria-Gasteiz,Vitoria-Gasteiz,País Vasco,Vitoria


In [12]:
census.iloc[-10:]

Unnamed: 0,province code,Municipality code or size,Hole identifier,Final number of the person inside the hole,Person lift factor,Birth month,Year of birth,Age,Sex,Country code of nationality,...,Number of common children of the kernel,Large family indicator,Type of partner (de facto or de jure),"Type of couple (same sex, different sex)",Age difference between male and female core,locationCode,location,municipality,autonomous_community,closest_glovo_city
2097308,52.0,1.0,1620162.0,2.0,8.003594,5.0,1952.0,59.0,6.0,228.0,...,1.0,1.0,1.0,1.0,7.0,52001,Melilla,Melilla,Melilla,Roquetas de mar
2097309,52.0,1.0,1620162.0,3.0,8.003594,11.0,1990.0,20.0,1.0,108.0,...,1.0,1.0,1.0,1.0,7.0,52001,Melilla,Melilla,Melilla,Roquetas de mar
2097310,52.0,1.0,1620162.0,4.0,8.003594,12.0,1988.0,22.0,6.0,108.0,...,0.0,1.0,4.0,4.0,8.0,52001,Melilla,Melilla,Melilla,Roquetas de mar
2097311,52.0,1.0,1620162.0,5.0,8.003594,11.0,2009.0,1.0,1.0,108.0,...,0.0,1.0,4.0,4.0,8.0,52001,Melilla,Melilla,Melilla,Roquetas de mar
2097312,52.0,1.0,1621306.0,1.0,8.550835,7.0,1963.0,48.0,1.0,228.0,...,1.0,2.0,1.0,1.0,7.0,52001,Melilla,Melilla,Melilla,Roquetas de mar
2097313,52.0,1.0,1621306.0,2.0,8.550835,5.0,1974.0,37.0,6.0,228.0,...,1.0,2.0,1.0,1.0,7.0,52001,Melilla,Melilla,Melilla,Roquetas de mar
2097314,52.0,1.0,1621306.0,3.0,8.550835,8.0,1996.0,15.0,6.0,108.0,...,1.0,2.0,1.0,1.0,7.0,52001,Melilla,Melilla,Melilla,Roquetas de mar
2097315,52.0,1.0,1621306.0,4.0,8.550835,5.0,1998.0,13.0,6.0,108.0,...,1.0,2.0,1.0,1.0,7.0,52001,Melilla,Melilla,Melilla,Roquetas de mar
2097316,52.0,1.0,1621306.0,5.0,8.550835,12.0,2001.0,9.0,6.0,108.0,...,,,,,,52001,Melilla,Melilla,Melilla,Roquetas de mar
2097317,52.0,1.0,1621306.0,6.0,8.550835,8.0,2007.0,4.0,6.0,108.0,...,1.0,2.0,1.0,1.0,7.0,52001,Melilla,Melilla,Melilla,Roquetas de mar


In [11]:
len(census)

2097318

In [14]:
census[census.closest_glovo_city.isna()]

Unnamed: 0,province code,Municipality code or size,Hole identifier,Final number of the person inside the hole,Person lift factor,Birth month,Year of birth,Age,Sex,Country code of nationality,...,Number of common children of the kernel,Large family indicator,Type of partner (de facto or de jure),"Type of couple (same sex, different sex)",Age difference between male and female core,locationCode,location,municipality,autonomous_community,closest_glovo_city


### create each glovo-city-region's share of foreigners

In [16]:
colsOfInterest = ['closest_glovo_city', 'municipality', 'autonomous_community', 'Age', 'Sex', 'Country code of nationality', 'Country of birth code']
byCity = census.groupby('closest_glovo_city')[colsOfInterest]
byCity.get_group('Vitoria')

Unnamed: 0,closest_glovo_city,municipality,autonomous_community,Age,Sex,Country code of nationality,Country of birth code
0,Vitoria,Vitoria-Gasteiz,País Vasco,96.0,1.0,108.0,108.0
1,Vitoria,Vitoria-Gasteiz,País Vasco,88.0,6.0,108.0,108.0
2,Vitoria,Vitoria-Gasteiz,País Vasco,94.0,1.0,108.0,108.0
3,Vitoria,Vitoria-Gasteiz,País Vasco,93.0,1.0,108.0,108.0
4,Vitoria,Vitoria-Gasteiz,País Vasco,47.0,1.0,341.0,341.0
...,...,...,...,...,...,...,...
871196,Vitoria,Logroño,"Rioja, La",25.0,6.0,108.0,108.0
871197,Vitoria,Logroño,"Rioja, La",30.0,1.0,108.0,108.0
871198,Vitoria,Logroño,"Rioja, La",0.0,1.0,108.0,108.0
871204,Vitoria,Logroño,"Rioja, La",24.0,6.0,108.0,108.0


In [17]:
topCountryCodes = list(census['Country of birth code'].value_counts(dropna=False).index)[:20]
topCountryCodes

[108.0,
 228.0,
 345.0,
 343.0,
 128.0,
 340.0,
 110.0,
 348.0,
 126.0,
 125.0,
 351.0,
 341.0,
 315.0,
 326.0,
 342.0,
 350.0,
 407.0,
 115.0,
 104.0,
 123.0]

In [18]:
countryCodeKeyDF = pd.read_excel('country_codes_key.xlsx')
countryCodeKeyDF

Unnamed: 0,code,country
0,102,Austria
1,103,Bélgica
2,104,Bulgaria
3,106,Chipre
4,107,Dinamarca
...,...,...
196,514,Kiribati
197,515,Nauru
198,516,Palaos
199,517,Timor Oriental


In [91]:
collapsedAux = pd.DataFrame()

for groupName, group in byCity.__iter__():      # for each group of municipalities assigned to one glovo-city, take all the observations of those (several) municipalities and calculate the share of different kinds of foreigners

    row = pd.Series(name=groupName, dtype=float) 
    
    row['total_observations'] = len(group)
    row['mean_age'] = group.Age.mean()
    row['mean_female_share'] = group.Sex.replace({1.0: 0, 6.0: 1}).mean()
    
    
    for code in topCountryCodes:
        share = len(group[group['Country code of nationality'] == code]) / row['total_observations']

        countryName = countryCodeKeyDF[countryCodeKeyDF.code == int(code)].country.values[0].replace(' ', '_').lower()
        row[f'share_{countryName}'] = share



    collapsedAux = collapsedAux.append(row)

collapsedAux = collapsedAux.reset_index().rename(columns={'index': 'closest_glovo_city'})

collapsedAux



Unnamed: 0,closest_glovo_city,total_observations,mean_age,mean_female_share,share_españa,share_marruecos,share_ecuador,share_colombia,share_rumanía,share_argentina,...,share_venezuela,share_bolivia,share_cuba,share_república_dominicana,share_brasil,share_uruguay,share_china,share_italia,share_bulgaria,share_portugal
0,A Coruña,32880.0,43.931113,0.528072,0.971077,0.000852,0.000639,0.002342,0.000517,0.002311,...,0.002068,0.000639,0.001642,0.001460,0.002555,0.003345,0.000395,0.001277,0.000000,0.001156
1,Aguadulce,10832.0,38.757939,0.512371,0.979505,0.001569,0.001569,0.004524,0.002308,0.000923,...,0.000831,0.000462,0.000185,0.000000,0.000000,0.000369,0.000000,0.000092,0.000462,0.000092
2,Albacete,17612.0,39.946798,0.514933,0.959630,0.002612,0.003009,0.006359,0.008630,0.001476,...,0.000284,0.004599,0.000227,0.000284,0.000284,0.000057,0.000738,0.000454,0.001306,0.000057
3,Alcala de Henares,18549.0,39.607364,0.507575,0.916869,0.003127,0.003235,0.006523,0.031754,0.001671,...,0.000539,0.000647,0.000701,0.001941,0.001402,0.000323,0.001294,0.001294,0.004097,0.000809
4,Alcantarilla,12650.0,38.683004,0.505613,0.916522,0.015257,0.031146,0.003557,0.003636,0.000791,...,0.000474,0.008696,0.000237,0.000395,0.000711,0.000079,0.000237,0.000711,0.000791,0.001423
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,Valladolid,32628.0,44.315220,0.524979,0.968371,0.002421,0.002728,0.004352,0.002452,0.000613,...,0.000889,0.001318,0.000736,0.002145,0.001471,0.000215,0.000950,0.000613,0.003586,0.000919
72,Vigo,28464.0,43.428436,0.525084,0.962303,0.000562,0.000562,0.002530,0.001862,0.003056,...,0.003021,0.000351,0.001581,0.001089,0.003829,0.002986,0.000281,0.001300,0.000000,0.003794
73,Vitoria,32601.0,43.014079,0.517377,0.935523,0.006994,0.004172,0.007883,0.008957,0.001288,...,0.000798,0.004356,0.001012,0.001656,0.001748,0.000123,0.000399,0.000859,0.000705,0.003067
74,Xativa,18000.0,42.007278,0.517056,0.940389,0.007611,0.006833,0.003333,0.006389,0.002111,...,0.000611,0.001889,0.001222,0.000500,0.000333,0.001278,0.000722,0.001833,0.006833,0.000333


In [92]:
# collapsedAux = pd.merge(collapsedAux, ass[['closest_glovo_city', 'autonomous_community']].groupby('closest_glovo_city').first(), on='closest_glovo_city', how='left')

for glovoCity in collapsedAux.closest_glovo_city:
    # for each glovo city, the dominant autonomous community is those with the most municipalities for which this glovo city is the closest
    collapsedAux.loc[collapsedAux.closest_glovo_city == glovoCity, 'autonomous_community'] = ass[['closest_glovo_city', 'autonomous_community']].groupby('closest_glovo_city').get_group(glovoCity).value_counts('autonomous_community').index[0]
collapsedAux

Unnamed: 0,closest_glovo_city,total_observations,mean_age,mean_female_share,share_españa,share_marruecos,share_ecuador,share_colombia,share_rumanía,share_argentina,...,share_bolivia,share_cuba,share_república_dominicana,share_brasil,share_uruguay,share_china,share_italia,share_bulgaria,share_portugal,autonomous_community
0,A Coruña,32880.0,43.931113,0.528072,0.971077,0.000852,0.000639,0.002342,0.000517,0.002311,...,0.000639,0.001642,0.001460,0.002555,0.003345,0.000395,0.001277,0.000000,0.001156,Galicia
1,Aguadulce,10832.0,38.757939,0.512371,0.979505,0.001569,0.001569,0.004524,0.002308,0.000923,...,0.000462,0.000185,0.000000,0.000000,0.000369,0.000000,0.000092,0.000462,0.000092,Andalucía
2,Albacete,17612.0,39.946798,0.514933,0.959630,0.002612,0.003009,0.006359,0.008630,0.001476,...,0.004599,0.000227,0.000284,0.000284,0.000057,0.000738,0.000454,0.001306,0.000057,Castilla-La Mancha
3,Alcala de Henares,18549.0,39.607364,0.507575,0.916869,0.003127,0.003235,0.006523,0.031754,0.001671,...,0.000647,0.000701,0.001941,0.001402,0.000323,0.001294,0.001294,0.004097,0.000809,Castilla-La Mancha
4,Alcantarilla,12650.0,38.683004,0.505613,0.916522,0.015257,0.031146,0.003557,0.003636,0.000791,...,0.008696,0.000237,0.000395,0.000711,0.000079,0.000237,0.000711,0.000791,0.001423,"Murcia, Región de"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,Valladolid,32628.0,44.315220,0.524979,0.968371,0.002421,0.002728,0.004352,0.002452,0.000613,...,0.001318,0.000736,0.002145,0.001471,0.000215,0.000950,0.000613,0.003586,0.000919,Castilla y León
72,Vigo,28464.0,43.428436,0.525084,0.962303,0.000562,0.000562,0.002530,0.001862,0.003056,...,0.000351,0.001581,0.001089,0.003829,0.002986,0.000281,0.001300,0.000000,0.003794,Galicia
73,Vitoria,32601.0,43.014079,0.517377,0.935523,0.006994,0.004172,0.007883,0.008957,0.001288,...,0.004356,0.001012,0.001656,0.001748,0.000123,0.000399,0.000859,0.000705,0.003067,"Rioja, La"
74,Xativa,18000.0,42.007278,0.517056,0.940389,0.007611,0.006833,0.003333,0.006389,0.002111,...,0.001889,0.001222,0.000500,0.000333,0.001278,0.000722,0.001833,0.006833,0.000333,Comunitat Valenciana


In [93]:
collapsedAux.autonomous_community.value_counts(dropna=False)

Andalucía                      15
Cataluña                       12
Comunitat Valenciana            8
Castilla-La Mancha              6
Madrid, Comunidad de            6
Castilla y León                 6
Galicia                         5
País Vasco                      4
Murcia, Región de               3
Asturias, Principado de         2
Balears, Illes                  2
Canarias                        2
Extremadura                     1
Navarra, Comunidad Foral de     1
Cantabria                       1
Rioja, La                       1
Aragón                          1
Name: autonomous_community, dtype: int64

In [94]:
sorted(collapsedAux.autonomous_community.unique())

['Andalucía',
 'Aragón',
 'Asturias, Principado de',
 'Balears, Illes',
 'Canarias',
 'Cantabria',
 'Castilla y León',
 'Castilla-La Mancha',
 'Cataluña',
 'Comunitat Valenciana',
 'Extremadura',
 'Galicia',
 'Madrid, Comunidad de',
 'Murcia, Región de',
 'Navarra, Comunidad Foral de',
 'País Vasco',
 'Rioja, La']

In [95]:
sorted(ass.autonomous_community.unique())

['Andalucía',
 'Aragón',
 'Asturias, Principado de',
 'Balears, Illes',
 'Canarias',
 'Cantabria',
 'Castilla y León',
 'Castilla-La Mancha',
 'Cataluña',
 'Ceuta',
 'Comunitat Valenciana',
 'Extremadura',
 'Galicia',
 'Madrid, Comunidad de',
 'Melilla',
 'Murcia, Región de',
 'Navarra, Comunidad Foral de',
 'País Vasco',
 'Rioja, La']

-> Ceuta & Melilla are missing (are independent cities anyway)

In [96]:
collapsedAux.to_excel('collapsedAux.xlsx')

## ESS  

### select years and countries

In [97]:
ESS = pd.read_csv('../aux_data/03_xenophobia/ESS/data/columnSubset_normalised_indeces.csv', index_col=0, low_memory=False)
ESS = ESS[
    (ESS.cntry == 'ES')  
    & (ESS.year >= 2010) 
    & ~(ESS.region_decoded.isin(['Ciudad Autónoma de Ceuta', 'Ciudad Autónoma de Melilla'])) # only excludes 34 observations
    ]
ESS.reset_index(drop=True)

Unnamed: 0,imsmetn,imdfetn,impcntr,imbgeco,imueclt,imwbcnt,cntry,idno,year,anctry1,...,region_decoded,imsmetn_norm,imdfetn_norm,impcntr_norm,imbgeco_norm,imueclt_norm,imwbcnt_norm,index_antiimmigration,index_xenophobia,index_all
0,3.0,3.0,3.0,8.0,8.0,8.0,ES,1.0,2010,,...,Comunidad de Madrid,0.452481,0.210653,0.131665,0.058908,-0.985295,0.063442,0.213427,-0.460927,-0.011358
1,4.0,4.0,4.0,0.0,0.0,0.0,ES,2.0,2010,,...,País Vasco,1.176814,0.939987,0.839108,0.494751,2.108017,0.493011,0.862665,1.300514,1.008615
2,2.0,2.0,2.0,6.0,8.0,5.0,ES,3.0,2010,,...,Castilla y León,-0.271852,-0.518681,-0.575777,0.167869,-0.985295,0.224530,-0.299611,-0.380383,-0.326535
3,4.0,4.0,4.0,0.0,0.0,5.0,ES,6.0,2010,,...,Comunidad Valenciana,1.176814,0.939987,0.839108,0.494751,2.108017,0.224530,0.862665,1.166273,0.963868
4,2.0,2.0,2.0,6.0,6.0,6.0,ES,8.0,2010,,...,Galicia,-0.271852,-0.518681,-0.575777,0.167869,-0.211967,0.170834,-0.299611,-0.020567,-0.206596
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9286,2.0,2.0,3.0,5.0,6.0,1.0,ES,69763.0,2018,13070.0,...,Cataluña,-0.271852,-0.518681,0.131665,0.222349,-0.211967,0.439315,-0.109130,0.113674,-0.034862
9287,3.0,3.0,3.0,6.0,7.0,8.0,ES,69803.0,2018,13070.0,...,Aragón,0.452481,0.210653,0.131665,0.167869,-0.598631,0.063442,0.240667,-0.267595,0.071246
9288,2.0,2.0,2.0,8.0,9.0,9.0,ES,69837.0,2018,61070.0,...,Comunidad Valenciana,-0.271852,-0.518681,-0.575777,0.058908,-1.371959,0.009745,-0.326851,-0.681107,-0.444936
9289,8.0,8.0,8.0,88.0,6.0,88.0,ES,69939.0,2018,13070.0,...,País Vasco,4.074146,3.857324,3.668878,-4.299521,-0.211967,-4.232249,1.825207,-2.222108,0.476102


In [98]:
ESS.year.value_counts()

2016    1951
2014    1916
2012    1883
2010    1880
2018    1661
Name: year, dtype: int64

In [99]:
ESS.region_decoded.value_counts()

Andalucía                     1862
Comunidad de Madrid           1312
Cataluña                      1268
Comunidad Valenciana           924
Galicia                        622
Castilla y León                544
Castilla-La Mancha             448
País Vasco                     427
Canarias                       354
Aragón                         308
Extremadura                    252
Región de Murcia               248
Principado de Asturias         228
Illes Balears                  180
Cantabria                      130
Comunidad Foral de Navarra     126
La Rioja                        58
Name: region_decoded, dtype: int64

### fuzzy matching

In [100]:
sorted(ESS.region_decoded.unique())

['Andalucía',
 'Aragón',
 'Canarias',
 'Cantabria',
 'Castilla y León',
 'Castilla-La Mancha',
 'Cataluña',
 'Comunidad Foral de Navarra',
 'Comunidad Valenciana',
 'Comunidad de Madrid',
 'Extremadura',
 'Galicia',
 'Illes Balears',
 'La Rioja',
 'País Vasco',
 'Principado de Asturias',
 'Región de Murcia']

In [101]:
sorted(collapsedAux.autonomous_community.unique())

['Andalucía',
 'Aragón',
 'Asturias, Principado de',
 'Balears, Illes',
 'Canarias',
 'Cantabria',
 'Castilla y León',
 'Castilla-La Mancha',
 'Cataluña',
 'Comunitat Valenciana',
 'Extremadura',
 'Galicia',
 'Madrid, Comunidad de',
 'Murcia, Región de',
 'Navarra, Comunidad Foral de',
 'País Vasco',
 'Rioja, La']

In [102]:
collapsedESS = ESS[['region_decoded', 'index_antiimmigration', 'index_xenophobia', 'index_all']].groupby('region_decoded').mean().reset_index()
collapsedESS

Unnamed: 0,region_decoded,index_antiimmigration,index_xenophobia,index_all
0,Andalucía,-0.024655,-0.132005,-0.048291
1,Aragón,0.049386,-0.133809,-0.005413
2,Canarias,-0.008962,-0.082150,-0.029668
3,Cantabria,0.023196,-0.185491,-0.038931
4,Castilla y León,0.091483,-0.113672,0.030717
...,...,...,...,...
12,Illes Balears,-0.075930,-0.134542,-0.092726
13,La Rioja,0.004147,-0.196317,-0.053123
14,País Vasco,-0.074517,-0.209440,-0.107988
15,Principado de Asturias,0.105272,-0.128852,0.051152


In [103]:
from fuzzywuzzy import process

process.extract('Principado de Asturias', choices=collapsedAux.autonomous_community.unique())


[('Asturias, Principado de', 95),
 ('Murcia, Región de', 56),
 ('Canarias', 56),
 ('Galicia', 51),
 ('Cataluña', 51)]

In [104]:
process.extractOne('Principado de Asturias', choices=collapsedAux.autonomous_community.unique(), score_cutoff=90)

('Asturias, Principado de', 95)

In [111]:
collapsedESS['autonomous_community'] = collapsedESS.region_decoded.apply(lambda region: process.extractOne(region, choices=collapsedAux.autonomous_community.unique(), score_cutoff=90))
pd.options.display.min_rows = 50
collapsedESS

Unnamed: 0,region_decoded,index_antiimmigration,index_xenophobia,index_all,autonomous_community
0,Andalucía,-0.024655,-0.132005,-0.048291,"(Andalucía, 100)"
1,Aragón,0.049386,-0.133809,-0.005413,"(Aragón, 100)"
2,Canarias,-0.008962,-0.08215,-0.029668,"(Canarias, 100)"
3,Cantabria,0.023196,-0.185491,-0.038931,"(Cantabria, 100)"
4,Castilla y León,0.091483,-0.113672,0.030717,"(Castilla y León, 100)"
5,Castilla-La Mancha,0.023379,-0.06705,0.004133,"(Castilla-La Mancha, 100)"
6,Cataluña,-0.040758,-0.12926,-0.06562,"(Cataluña, 100)"
7,Comunidad Foral de Navarra,-0.156991,-0.130415,-0.143776,"(Navarra, Comunidad Foral de, 95)"
8,Comunidad Valenciana,-0.060331,-0.146607,-0.080495,"(Comunitat Valenciana, 90)"
9,Comunidad de Madrid,-0.072231,-0.245129,-0.121979,"(Madrid, Comunidad de, 95)"


In [112]:
collapsedESS['autonomous_community'] = collapsedESS.region_decoded.apply(lambda region: process.extractOne(region, choices=collapsedAux.autonomous_community.unique(), score_cutoff=90)[0])
collapsedESS

Unnamed: 0,region_decoded,index_antiimmigration,index_xenophobia,index_all,autonomous_community
0,Andalucía,-0.024655,-0.132005,-0.048291,Andalucía
1,Aragón,0.049386,-0.133809,-0.005413,Aragón
2,Canarias,-0.008962,-0.08215,-0.029668,Canarias
3,Cantabria,0.023196,-0.185491,-0.038931,Cantabria
4,Castilla y León,0.091483,-0.113672,0.030717,Castilla y León
5,Castilla-La Mancha,0.023379,-0.06705,0.004133,Castilla-La Mancha
6,Cataluña,-0.040758,-0.12926,-0.06562,Cataluña
7,Comunidad Foral de Navarra,-0.156991,-0.130415,-0.143776,"Navarra, Comunidad Foral de"
8,Comunidad Valenciana,-0.060331,-0.146607,-0.080495,Comunitat Valenciana
9,Comunidad de Madrid,-0.072231,-0.245129,-0.121979,"Madrid, Comunidad de"


In [119]:
pd.options.display.min_rows = 10

### merging ESS to aux

In [106]:
collapsedAux = pd.merge(collapsedAux, collapsedESS, on='autonomous_community', how='inner').drop(columns=['region_decoded'])
collapsedAux

Unnamed: 0,closest_glovo_city,total_observations,mean_age,mean_female_share,share_españa,share_marruecos,share_ecuador,share_colombia,share_rumanía,share_argentina,...,share_brasil,share_uruguay,share_china,share_italia,share_bulgaria,share_portugal,autonomous_community,index_antiimmigration,index_xenophobia,index_all
0,A Coruña,32880.0,43.931113,0.528072,0.971077,0.000852,0.000639,0.002342,0.000517,0.002311,...,0.002555,0.003345,0.000395,0.001277,0.000000,0.001156,Galicia,-0.103921,-0.140070,-0.100273
1,Ourense,13489.0,44.740752,0.535992,0.966046,0.002891,0.000074,0.006153,0.001631,0.002150,...,0.003336,0.000890,0.000222,0.000593,0.000519,0.003781,Galicia,-0.103921,-0.140070,-0.100273
2,Pontevedra,8871.0,42.048360,0.528464,0.969676,0.002255,0.000338,0.004171,0.000789,0.001804,...,0.003156,0.001240,0.001353,0.000451,0.000113,0.003044,Galicia,-0.103921,-0.140070,-0.100273
3,Santiago de Compostela,6293.0,44.316542,0.535516,0.969967,0.000636,0.000477,0.000477,0.000318,0.002225,...,0.001271,0.000477,0.000000,0.002066,0.000000,0.000477,Galicia,-0.103921,-0.140070,-0.100273
4,Vigo,28464.0,43.428436,0.525084,0.962303,0.000562,0.000562,0.002530,0.001862,0.003056,...,0.003829,0.002986,0.000281,0.001300,0.000000,0.003794,Galicia,-0.103921,-0.140070,-0.100273
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,Tenerife,54462.0,41.018049,0.521905,0.937314,0.003764,0.001983,0.003636,0.002662,0.002405,...,0.000606,0.001946,0.000900,0.005068,0.000386,0.001083,Canarias,-0.008962,-0.082150,-0.029668
72,Pamplona,18857.0,43.665641,0.522936,0.934348,0.003977,0.009121,0.007689,0.004667,0.002652,...,0.002758,0.000265,0.000318,0.001273,0.002917,0.003871,"Navarra, Comunidad Foral de",-0.156991,-0.130415,-0.143776
73,Santander,21180.0,43.469972,0.528093,0.950472,0.004721,0.004485,0.005099,0.004013,0.001086,...,0.001653,0.000142,0.000614,0.000708,0.000331,0.000331,Cantabria,0.023196,-0.185491,-0.038931
74,Vitoria,32601.0,43.014079,0.517377,0.935523,0.006994,0.004172,0.007883,0.008957,0.001288,...,0.001748,0.000123,0.000399,0.000859,0.000705,0.003067,"Rioja, La",0.004147,-0.196317,-0.053123


In [114]:
collapsedAux.isna().sum()

closest_glovo_city            0
total_observations            0
mean_age                      0
mean_female_share             0
share_españa                  0
share_marruecos               0
share_ecuador                 0
share_colombia                0
share_rumanía                 0
share_argentina               0
share_francia                 0
share_perú                    0
share_alemania                0
share_reino_unido             0
share_venezuela               0
share_bolivia                 0
share_cuba                    0
share_república_dominicana    0
share_brasil                  0
share_uruguay                 0
share_china                   0
share_italia                  0
share_bulgaria                0
share_portugal                0
autonomous_community          0
index_antiimmigration         0
index_xenophobia              0
index_all                     0
dtype: int64

In [115]:
collapsedAux.autonomous_community.value_counts(dropna=False)

Andalucía                      15
Cataluña                       12
Comunitat Valenciana            8
Castilla-La Mancha              6
Madrid, Comunidad de            6
Castilla y León                 6
Galicia                         5
País Vasco                      4
Murcia, Región de               3
Asturias, Principado de         2
Balears, Illes                  2
Canarias                        2
Extremadura                     1
Navarra, Comunidad Foral de     1
Cantabria                       1
Rioja, La                       1
Aragón                          1
Name: autonomous_community, dtype: int64

In [128]:
collapsedAux.to_excel('collapsed_aux.xlsx')

## glovo  

### collapse glovo orders by city, week, culinary culture

In [120]:
glovo = pd.read_csv('../outputs/restaurant_aggregation_spain.csv', index_col=0).rename(columns={'culinary_origin': 'culinary_culture', 'city': 'city_code'})
glovo

Unnamed: 0,year,week,store_name,store_address_id,city_code,culinary_culture,store_tag,orders_this_restaurant_this_week,orders_this_city_this_week,date
0,2016,6,Carrefour,105.0,BCN,other,Groceries,2,10,2016-02-12
1,2016,6,Chivuo's,35.0,BCN,US,Burger,1,10,2016-02-12
2,2016,6,Chok Barcelona,75.0,BCN,,,2,10,2016-02-12
3,2016,6,Comaxurros,84.0,BCN,,,1,10,2016-02-12
4,2016,6,Greenshots,72.0,BCN,,,1,10,2016-02-12
...,...,...,...,...,...,...,...,...,...,...
1435554,2021,39,Óptica ALAIN AFFLELOU,340808.0,MAD,,,2,61256,2021-10-01
1435555,2021,39,Óptica ALAIN AFFLELOU,340810.0,VAL,,,1,12192,2021-10-01
1435556,2021,39,Órale Compadre,2143.0,MAD,Mexico,Mexican,4,61256,2021-10-01
1435557,2021,39,Órale Compadre,181099.0,MAD,Mexico,Mexican,9,61256,2021-10-01


In [121]:
collapsedGlovo = (
    glovo
    .drop(columns=['store_address_id'])
    .groupby(['year', 'week', 'city_code', 'culinary_culture'])
    .agg({
        'orders_this_restaurant_this_week': 'sum', 
        'orders_this_city_this_week':       'first', # since this aggregation (city + culinary_culture) is MORE specific than just (city), its fine to take the first value
    })
    .reset_index()
)

collapsedGlovo

Unnamed: 0,year,week,city_code,culinary_culture,orders_this_restaurant_this_week,orders_this_city_this_week
0,2016,6,BCN,Italy,1,10
1,2016,6,BCN,Thailand,1,10
2,2016,6,BCN,US,1,10
3,2016,6,BCN,other,2,10
4,2016,7,BCN,Thailand,1,18
...,...,...,...,...,...,...
84975,2021,53,ZAR,Spain,43,6724
84976,2021,53,ZAR,Thailand,23,6724
84977,2021,53,ZAR,Turkey,83,6724
84978,2021,53,ZAR,US,963,6724


### decode city_code

In [122]:
glovoCityCodes = pd.read_excel('glovo_city_codes.xlsx')
glovoCityCodes

Unnamed: 0,city_code,city_name
0,AGU,Aguadulce
1,ALB,Albacete
2,ALC,Alicante
3,ALG,Algeciras
4,ALM,Almeria
...,...,...
74,VIT,Vitoria
75,VLL,Valladolid
76,XRY,Jerez de la Frontera
77,XTV,Xativa


In [123]:
collapsedGlovo = pd.merge(collapsedGlovo, glovoCityCodes, on='city_code', how='left')
collapsedGlovo

Unnamed: 0,year,week,city_code,culinary_culture,orders_this_restaurant_this_week,orders_this_city_this_week,city_name
0,2016,6,BCN,Italy,1,10,Barcelona
1,2016,6,BCN,Thailand,1,10,Barcelona
2,2016,6,BCN,US,1,10,Barcelona
3,2016,6,BCN,other,2,10,Barcelona
4,2016,7,BCN,Thailand,1,18,Barcelona
...,...,...,...,...,...,...,...
84975,2021,53,ZAR,Spain,43,6724,Zaragoza
84976,2021,53,ZAR,Thailand,23,6724,Zaragoza
84977,2021,53,ZAR,Turkey,83,6724,Zaragoza
84978,2021,53,ZAR,US,963,6724,Zaragoza


In [124]:
collapsedGlovo.city_name.isna().sum()

0

In [126]:
collapsedGlovo.to_excel('collapsed_glovo.xlsx')

### merge glovo to aux

In [31]:
collapsedAux = pd.read_csv('collapsed_aux.xlsx', index_col=0)
collapsedAux

Unnamed: 0,closestGlovoCity,total_observations,mean_age,mean_female_share,share_España,share_Marruecos,share_Ecuador,share_Colombia,share_Rumanía,share_Argentina,...,share_Uruguay,share_China,share_Italia,share_Bulgaria,share_Chile,glovoCityCoords,autonomousCommunity,index_antiimmigration,index_xenophobia,index_all
0,A Coruña,32880.0,43.931113,0.528072,0.971077,0.000852,0.000639,0.002342,0.000517,0.002311,...,0.003345,0.000395,0.001277,0.000000,0.000395,"[43.3709703, -8.3959425]",Galicia,-0.103921,-0.140070,-0.100273
1,Ourense,13489.0,44.740752,0.535992,0.966046,0.002891,0.000074,0.006153,0.001631,0.002150,...,0.000890,0.000222,0.000593,0.000519,0.000148,"[42.1968459, -7.6114105]",Galicia,-0.103921,-0.140070,-0.100273
2,Pontevedra,8871.0,42.048360,0.528464,0.969676,0.002255,0.000338,0.004171,0.000789,0.001804,...,0.001240,0.001353,0.000451,0.000113,0.000000,"[42.6075172, -8.4714942]",Galicia,-0.103921,-0.140070,-0.100273
3,Santiago de Compostela,6293.0,44.316542,0.535516,0.969967,0.000636,0.000477,0.000477,0.000318,0.002225,...,0.000477,0.000000,0.002066,0.000000,0.001112,"[42.8804219, -8.5458608]",Galicia,-0.103921,-0.140070,-0.100273
4,Vigo,28464.0,43.428436,0.525084,0.962303,0.000562,0.000562,0.002530,0.001862,0.003056,...,0.002986,0.000281,0.001300,0.000000,0.000703,"[42.2376602, -8.7247205]",Galicia,-0.103921,-0.140070,-0.100273
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,Las Palmas de Gran Canaria,49818.0,40.012425,0.518788,0.930346,0.004717,0.002429,0.010338,0.000723,0.003493,...,0.001465,0.001064,0.004015,0.000482,0.001445,"[28.1288694, -15.4349015]",Canarias,-0.008962,-0.082150,-0.029668
63,Tenerife,54462.0,41.018049,0.521905,0.937314,0.003764,0.001983,0.003636,0.002662,0.002405,...,0.001946,0.000900,0.005068,0.000386,0.000588,"[28.2935785, -16.621447121144122]",Canarias,-0.008962,-0.082150,-0.029668
64,Pamplona,37828.0,44.012715,0.525642,0.945332,0.002670,0.005842,0.006635,0.003807,0.002009,...,0.000264,0.000423,0.001269,0.001692,0.000925,"[42.8184538, -1.6442556]",Comunidad Foral de Navarra,-0.156991,-0.130415,-0.143776
65,Santander,16464.0,44.825680,0.536018,0.945517,0.001579,0.003705,0.006317,0.005041,0.001397,...,0.000182,0.000668,0.000911,0.000425,0.001033,"[43.4620412, -3.8099719]",Cantabria,0.023196,-0.185491,-0.038931


In [131]:
collapsed = pd.merge(collapsedGlovo, collapsedAux, left_on='city_name', right_on='closest_glovo_city', how='left').drop(columns='closest_glovo_city')
collapsed

Unnamed: 0,year,week,city_code,culinary_culture,orders_this_restaurant_this_week,orders_this_city_this_week,city_name,total_observations,mean_age,mean_female_share,...,share_brasil,share_uruguay,share_china,share_italia,share_bulgaria,share_portugal,autonomous_community,index_antiimmigration,index_xenophobia,index_all
0,2016,6,BCN,Italy,1,10,Barcelona,142585.0,45.374100,0.532707,...,0.001957,0.000982,0.001943,0.005898,0.000477,0.000912,Cataluña,-0.040758,-0.129260,-0.065620
1,2016,6,BCN,Thailand,1,10,Barcelona,142585.0,45.374100,0.532707,...,0.001957,0.000982,0.001943,0.005898,0.000477,0.000912,Cataluña,-0.040758,-0.129260,-0.065620
2,2016,6,BCN,US,1,10,Barcelona,142585.0,45.374100,0.532707,...,0.001957,0.000982,0.001943,0.005898,0.000477,0.000912,Cataluña,-0.040758,-0.129260,-0.065620
3,2016,6,BCN,other,2,10,Barcelona,142585.0,45.374100,0.532707,...,0.001957,0.000982,0.001943,0.005898,0.000477,0.000912,Cataluña,-0.040758,-0.129260,-0.065620
4,2016,7,BCN,Thailand,1,18,Barcelona,142585.0,45.374100,0.532707,...,0.001957,0.000982,0.001943,0.005898,0.000477,0.000912,Cataluña,-0.040758,-0.129260,-0.065620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84975,2021,53,ZAR,Spain,43,6724,Zaragoza,50957.0,43.521459,0.525188,...,0.001334,0.000353,0.001629,0.000765,0.000510,0.000883,Aragón,0.049386,-0.133809,-0.005413
84976,2021,53,ZAR,Thailand,23,6724,Zaragoza,50957.0,43.521459,0.525188,...,0.001334,0.000353,0.001629,0.000765,0.000510,0.000883,Aragón,0.049386,-0.133809,-0.005413
84977,2021,53,ZAR,Turkey,83,6724,Zaragoza,50957.0,43.521459,0.525188,...,0.001334,0.000353,0.001629,0.000765,0.000510,0.000883,Aragón,0.049386,-0.133809,-0.005413
84978,2021,53,ZAR,US,963,6724,Zaragoza,50957.0,43.521459,0.525188,...,0.001334,0.000353,0.001629,0.000765,0.000510,0.000883,Aragón,0.049386,-0.133809,-0.005413


In [133]:
collapsed = collapsed.rename(columns={'orders_this_restaurant_this_week': 'orders_this_culture_this_week'})

In [140]:
# check that all city_codes only have one unique city_name
for cityCode in collapsed.city_code.unique():
    assert len(collapsed[collapsed.city_code == cityCode].city_name.unique()) == 1
print('check passed')


check passed


In [141]:
collapsed.to_excel('collapsed_all.xlsx') # to look at
collapsed.to_csv('collapsed_all.csv') # to use