In [3]:
import pandas as pd

In [4]:
meteo = pd.read_csv("estaciones_control_datos_meteorologicos.csv", encoding="windows-1252", sep=";", skipfooter=10, engine="python", usecols=["CÓDIGO_CORTO", "DIRECCION"])
aq = pd.read_csv("informacion_estaciones_red_calidad_aire.csv", encoding="windows-1252", sep=";", usecols=["CODIGO_CORTO", "DIRECCION", "NOM_TIPO"])

In [5]:
meteo.rename({"CÓDIGO_CORTO": "CODIGO_CORTO"}, axis=1, inplace=True)

In [6]:
aq.head()

Unnamed: 0,CODIGO_CORTO,DIRECCION,NOM_TIPO
0,4,Plaza de España,Urbana tráfico
1,8,Entre C/ Alcalá y C/ O’ Donell,Urbana tráfico
2,11,Avda. Ramón y Cajal esq. C/ Príncipe de Vergara,Urbana tráfico
3,16,C/ Arturo Soria esq. C/ Vizconde de los Asilos,Urbana fondo
4,17,C/. Juan Peñalver,Urbana fondo


In [7]:
df = pd.merge(aq, meteo, "outer", on=["CODIGO_CORTO", "DIRECCION"]).fillna("Meteorological")    

In [8]:
df.NOM_TIPO = df.NOM_TIPO.apply(lambda x: {"Urbana tráfico": "UrbanTraffic", 
                             "Suburbana": "Suburban", 
                             "Urbana fondo": "UrbanBackground", 
                             'Meteorological': 'Meteorological'}[x])

In [9]:
def substitute_no_valid_chars(x: str):

    mapper = {"á": "a", "é": "e", "í": "i", "ó": "o", "ú": "u", "ñ": "n", "ü": "u", 
              "Á": "A", "É": "E", "Í": "I", "Ó": "O", "Ú": "U", "Ñ": "N"}
    
    new_str = []
    for c in x:
        new_str.append(mapper.get(c, c))

    return "".join(new_str)


In [10]:
df.DIRECCION = df.DIRECCION.apply(substitute_no_valid_chars)

### Add label column

Using the station code we have to assign the corresponding address which can be found in [interpretacion_calidad_del_aire](https://bit.ly/3czGpCF) and [interpretacion_de_datos_meteorologicos](
https://bit.ly/32eEtNM). The station code starts with 28079 and it is followed by the ESTACION number (with 3 digits: e.g. 4 -> 004). It can also be found in the prefix of PUNTO_MUESTREO.



In [11]:
labels={1:"Paseo Recoletos", 2:"Glorieta Carlos V", 3:"Plaza del Carmen", 
            35:"Plaza del Carmen", 4:"Plaza de Espana", 5:"Barrio del Pilar", 
            39:"Barrio del Pilar", 6:"Plaza Doctor Maranon", 
            7:"Plaza Mayor de Salamanca", 8:"Escuelas Aguirre", 
            9:"Plaza Luca de Tena", 10: "Cuatro Caminos", 38:"Cuatro Caminos", 
            11:"Avenida Ramon y Cajal", 12:"Plaza Manuel Becerra", 13:"Vallecas", 
            40:"Vallecas", 14:"Plaza Fernandez Ladreda", 15:"Plaza Castilla", 
            50:"Plaza Castilla", 16:"Arturo Soria", 17:"Villaverde Alto", 
            18:"Farolillo", 19: "Huerta Castaneda", 20:"Moratalaz", 
            36:"Moratalaz", 102:"JMD Moratalaz", 21:"Plaza Cristo Rey", 
            22:"Paseo Pontones", 23:"Alcala", 24:"Casa de Campo", 
            25:"Santa Eugenia", 26:"Urbanizacion Embajada", 
            55:"Urbanizacion Embajada", 27:"Barajas", 47:"Mendez Alvaro", 
            48:"Paseo de la Castellana", 49:"Retiro", 54:"Ensanche de Vallecas", 
            56:"Plaza Eliptica", 57:"Sanchinarro", 58:"El Pardo", 
            59:"Parque Juan Carlos I", 86:"Tres Olivos", 60:"Tres Olivos", 
            103:"JMD Villaverde", 104:"EDAR La China", 
            106:"Centro Municipal de Acustica", 107:"JMD Hortaleza", 
            108:"Penagrande", 109:"JMD Chamberi", 110:"JMD Centro", 
            111:"JMD Chamartin", 112:"JMD Vallecas 1", 113:"JMD Vallecas 2", 
            114:"Matadero 1", 115:"Matadero 2"}

In [12]:
df["LABEL"] = df["CODIGO_CORTO"].map(labels)

In [13]:
df

Unnamed: 0,CODIGO_CORTO,DIRECCION,NOM_TIPO,LABEL
0,4,Plaza de Espana,UrbanTraffic,Plaza de Espana
1,8,Entre C/ Alcala y C/ O’ Donell,UrbanTraffic,Escuelas Aguirre
2,11,Avda. Ramon y Cajal esq. C/ Principe de Vergara,UrbanTraffic,Avenida Ramon y Cajal
3,16,C/ Arturo Soria esq. C/ Vizconde de los Asilos,UrbanBackground,Arturo Soria
4,17,C/. Juan Penalver,UrbanBackground,Villaverde Alto
5,18,Calle Farolillo - C/Ervigio,UrbanBackground,Farolillo
6,24,Casa de Campo (Terminal del Teleferico),Suburban,Casa de Campo
7,27,"C/. Jupiter, 21 (Barajas)",UrbanBackground,Barajas
8,35,Plaza del Carmen esq. Tres Cruces.,UrbanBackground,Plaza del Carmen
9,36,Avd. Moratalaz esq. Camino de los Vinateros,UrbanTraffic,Moratalaz


In [14]:
df.to_csv("control_stations_info.csv", index=False)