# Indice de calidad de aire por ageb


__Descripción:__

A partír de los modelos de contaminacion por AGEB de cada contaminante (O3, PM10,PM2.5) se obtiene el Indice de calidad de aire por AGEB.


__Input__

- archivos de contaminación por AGEB modelados en  **interpolacion*.ipynb**

Responsable: Daniel Bustillos



In [3]:
import pandas as pd

In [4]:
df_O3 = pd.read_csv('../data/contaminacion/contaminacion_por_ageb/contaminacion_O3_ageb.csv')
df_PM10 = pd.read_csv('../data/contaminacion/contaminacion_por_ageb/contaminacion_PM10_ageb.csv')
df_PM25 = pd.read_csv('../data/contaminacion/contaminacion_por_ageb/contaminacion_PM2.5_ageb.csv')

In [5]:
df_O3.fillna(df_O3.mean(), inplace=True)
df_PM10.fillna(df_PM10.mean(), inplace=True)
df_PM25.fillna(df_PM25.mean(), inplace=True)

In [6]:
df_O3.dropna().shape

(5084, 22)

In [7]:
df_PM10.dropna().shape

(5084, 22)

In [8]:
df_PM25.dropna().shape

(5084, 22)

In [9]:
df_O3.head()

Unnamed: 0,Longitud,Latitud,CVE_AGEB,O3_por_ageb_0,O3_por_ageb_1,O3_por_ageb_7,O3_por_ageb_8,O3_por_ageb_9,O3_por_ageb_10,O3_por_ageb_11,...,O3_por_ageb_14,O3_por_ageb_15,O3_por_ageb_16,O3_por_ageb_17,O3_por_ageb_18,O3_por_ageb_19,O3_por_ageb_20,O3_por_ageb_21,O3_por_ageb_22,O3_por_ageb_23
0,-99.267061,19.651464,1142,13.628122,13.209581,3.833883,5.703524,12.004048,22.006814,35.477689,...,67.041504,67.668759,63.324419,55.104068,43.38611,30.757318,23.045932,18.905021,16.015706,14.29778
1,-98.954127,19.254488,1012,15.511295,14.751738,6.903726,8.597841,15.454088,26.431057,39.413621,...,70.816001,71.515558,67.63414,60.53732,50.311341,38.551757,30.031322,24.231629,19.879235,16.915637
2,-98.941566,19.64908,0393,13.115604,12.076184,3.374328,5.427114,12.038293,23.086096,36.9239,...,65.035937,64.816628,60.679129,53.267998,42.936391,31.701032,24.494514,20.165159,16.789342,14.433675
3,-99.390848,19.505639,003A,17.253947,17.067075,8.17156,9.424596,15.094111,24.229211,36.287788,...,68.26674,69.977465,66.455069,58.558184,46.959615,34.653979,26.763062,22.115811,19.07802,17.668616
4,-98.870761,19.151455,0037,17.721331,17.080992,8.995739,10.402019,16.98853,27.74967,40.549154,...,71.890517,73.121805,69.666211,62.810922,52.641014,41.128308,32.567274,26.428712,21.838155,18.962812


## convertir a indice

In [10]:
import datetime

# Fecha adelantada 24 horas
def asignar_fecha(row, columna):
    nueva_fecha = row[columna] + datetime.timedelta(hours=24)
    return nueva_fecha


# Conversión de unidades de microgramo/metro_cúbico a ppb(partículas por billón)
def convertir_ppb(row, columna):
    ppb = row[columna] / 1.96
    return ppb


# Cálculo del índice de calidad del aire para el PM10 (usando datos en microgramo/metro_cúbico)
def indice_PM10(row):
    if row >= 0 and row <= 40:
        indice = 1.2500 * (row)
        return round(indice)
    if row >= 41 and row <= 75:
        indice = (1.4412 * (row - 41)) + 51
        return round(indice)
    if row >= 76 and row <= 214:
        indice = (0.3551 * (row - 76)) + 101
        return round(indice)
    if row >= 215 and row <= 354:
        indice = (0.3525 * (row - 215)) + 151
        return round(indice)
    if row >= 355 and row <= 424:
        indice = (1.4348 * (row - 355)) + 201
        return round(indice)
    if row >= 425 and row <= 504:
        indice = (1.2532 * (row - 425) + 301)
        return round(indice)
    if row >= 505 and row <= 604:
        indice = (1.0000 * (row - 505) + 401)
        return round(indice)


# Cálculo del índice de calidad del aire para el O3 (usando datos en ppb)
def indice_O3(row):
    if row >= 0 and row <= 70:
        indice = 0.7143 * (row)
        return round(indice)
    if row >= 71 and row <= 95:
        indice = (2.0417 * (row - 71)) + 51
        return round(indice)
    if row >= 96 and row <= 154:
        indice = (2.4138 * (row - 96)) + 101
        return round(indice)
    if row >= 155 and row <= 204:
        indice = (1.0000 * (row - 155)) + 151
        return round(indice)
    if row >= 205 and row <= 404:
        indice = (0.4975 * (row - 205)) + 201
        return round(indice)
    if row >= 405 and row <= 504:
        indice = (1.000 * (row - 405) + 301)
        return round(indice)
    if row >= 505 and row <= 604:
        indice = (1.0000 * (row - 505) + 401)
        return round(indice)


def convertir_unidades(df, columna_fecha):
    df = df.sort_values([columna_fecha], ascending=[0])
    df['PM10mean_max_ppb'] = df.apply(lambda row: convertir_ppb(row, 'PM10mean_y'), axis=1)
    df_concentraciones = df[['fecha', 'O3_y', 'PM10mean_y_ppb', 'TMP', 'TMP_x', 'TMP_y']]

    return df_concentraciones



In [11]:
columnas_a_iterar = df_O3.columns.tolist()[4:]
columnas_a_iterar

['O3_por_ageb_1',
 'O3_por_ageb_7',
 'O3_por_ageb_8',
 'O3_por_ageb_9',
 'O3_por_ageb_10',
 'O3_por_ageb_11',
 'O3_por_ageb_12',
 'O3_por_ageb_13',
 'O3_por_ageb_14',
 'O3_por_ageb_15',
 'O3_por_ageb_16',
 'O3_por_ageb_17',
 'O3_por_ageb_18',
 'O3_por_ageb_19',
 'O3_por_ageb_20',
 'O3_por_ageb_21',
 'O3_por_ageb_22',
 'O3_por_ageb_23']

In [12]:
for columna in columnas_a_iterar:
     df_O3.loc[:,columna] = df_O3.loc[:,columna].apply(lambda x: indice_O3(x))

In [13]:
for columna in columnas_a_iterar:
     df_PM10.loc[:,columna] = df_PM10.loc[:,columna].apply(lambda x: indice_PM10(x))

In [14]:
for columna in columnas_a_iterar:
     df_PM25.loc[:,columna] = df_PM25.loc[:,columna].apply(lambda x: indice_PM10(x))

Promediamos los valores:

In [15]:
df_contaminacion_mean =  df_O3[['CVE_AGEB', 'O3_por_ageb_0', 'O3_por_ageb_1', 'O3_por_ageb_7', 'O3_por_ageb_8',
                                'O3_por_ageb_9', 'O3_por_ageb_10','O3_por_ageb_11', 'O3_por_ageb_12','O3_por_ageb_13',
                                'O3_por_ageb_14', 'O3_por_ageb_15', 'O3_por_ageb_16', 'O3_por_ageb_17', 'O3_por_ageb_18', 
                                'O3_por_ageb_19', 'O3_por_ageb_20','O3_por_ageb_21', 'O3_por_ageb_22', 'O3_por_ageb_23']]
df_contaminacion_mean.head()

Unnamed: 0,CVE_AGEB,O3_por_ageb_0,O3_por_ageb_1,O3_por_ageb_7,O3_por_ageb_8,O3_por_ageb_9,O3_por_ageb_10,O3_por_ageb_11,O3_por_ageb_12,O3_por_ageb_13,O3_por_ageb_14,O3_por_ageb_15,O3_por_ageb_16,O3_por_ageb_17,O3_por_ageb_18,O3_por_ageb_19,O3_por_ageb_20,O3_por_ageb_21,O3_por_ageb_22,O3_por_ageb_23
0,1142,13.628122,9,3,4,9,16,25,35,43,48.0,48.0,45.0,39,31,22,16,14,11,10
1,1012,15.511295,11,5,6,11,19,28,38,46,,52.0,48.0,43,36,28,21,17,14,12
2,0393,13.115604,9,2,4,9,16,26,36,43,46.0,46.0,43.0,38,31,23,17,14,12,10
3,003A,17.253947,12,6,7,11,17,26,35,44,49.0,50.0,47.0,42,34,25,19,16,14,13
4,0037,17.721331,12,6,7,12,20,29,38,46,53.0,55.0,50.0,45,38,29,23,19,16,14


In [16]:
for column in columnas_a_iterar:
    df_contaminacion_mean[column] = (df_O3.loc[:,column] + df_PM10.loc[:,column] + df_PM25.loc[:,column])/3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [17]:
df_contaminacion_mean.fillna(df_contaminacion_mean.mean(), inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


In [18]:
df_contaminacion_mean.head()

Unnamed: 0,CVE_AGEB,O3_por_ageb_0,O3_por_ageb_1,O3_por_ageb_7,O3_por_ageb_8,O3_por_ageb_9,O3_por_ageb_10,O3_por_ageb_11,O3_por_ageb_12,O3_por_ageb_13,O3_por_ageb_14,O3_por_ageb_15,O3_por_ageb_16,O3_por_ageb_17,O3_por_ageb_18,O3_por_ageb_19,O3_por_ageb_20,O3_por_ageb_21,O3_por_ageb_22,O3_por_ageb_23
0,1142,13.628122,26.333333,35.333333,41.333333,45.0,48.0,50.0,50.0,48.333333,48.0,47.0,45.333333,43.333333,41.0,38.0,34.0,31.0,29.0,28.0
1,1012,15.511295,25.0,30.0,34.333333,38.666667,43.333333,47.333333,49.0,49.333333,48.99984,48.333333,46.0,43.333333,40.333333,36.666667,32.666667,29.333333,27.666667,26.333333
2,0393,13.115604,27.0,36.0,42.333333,45.666667,48.0,50.333333,50.0,48.0,46.666667,45.666667,44.333333,43.0,41.0,39.0,35.0,31.666667,29.666667,26.556027
3,003A,17.253947,24.0,28.0,32.333333,36.333333,41.0,45.0,46.666667,47.333333,47.333333,46.666667,44.333333,42.0,38.333333,34.333333,31.0,28.666667,27.0,25.666667
4,0037,17.721331,25.333333,30.333333,34.666667,39.0,43.666667,47.666667,49.0,49.333333,49.666667,49.333333,46.666667,44.0,41.0,37.0,33.333333,30.0,28.333333,27.0


In [19]:
df_contaminacion_mean.to_csv("../data/production_data/contaminacion/contaminacion_indice_ageb.csv",index=False)