# Procesamiento de datos de Sentinel-2

## Librerías

In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob, os


## Obtener los datos

In [62]:
filesNacionalesMean = glob.glob("data/sentinel/states/mean/*.csv")
filesNacionalesSum = glob.glob("data/sentinel/states/sum/*.csv")
filesNacionalesSum[:5]

['data/sentinel/states/sum\\CO_sum_2018-01-01.csv',
 'data/sentinel/states/sum\\CO_sum_2019-01-01.csv',
 'data/sentinel/states/sum\\CO_sum_2020-01-01.csv',
 'data/sentinel/states/sum\\CO_sum_2021-01-01.csv',
 'data/sentinel/states/sum\\CO_sum_2022-01-01.csv']

### Diccionarios complementarios

In [39]:
replace_emision = {
  'pm10': '$PM_{10}$',
  'pm2.5': '$PM_{2.5}$',
  'co': '$CO$',
  'nox':'$NO_x$',
  'cov':'$COV$',
  'nh3':'$NH_3$',
  'so2':"$SO_2$",
  'no2':"$NO_2$",
  'o3':"$O_3$",
  'hcho':"$HCHO$",
}


In [64]:
filesNacionalesSum[0].split("_")[2][:10]

'2018-01-01'

## Preparar función de lectura

In [77]:
def readFileSentinel(path, dfFull, kind):
  cols_to_keep = ['ADM1_NAME',kind]
  name_Quimico = path.split("\\")[1].split("_")[0].lower()
  date = path.split("_")[2][:10]

  df = pd.read_csv(path)
  df = df.drop(columns=["ADM1_CODE","ADM0_CODE","EXP1_YEAR","system:index","STR1_YEAR","STATUS","ADM0_NAME","DISP_AREA"])
  #df.rename(columns={kind: replace_emision[name_Quimico]}, inplace=True)

  df['date'] = date
  df['quimico'] = replace_emision[name_Quimico]
  # check if dfFull is empty
  if dfFull.empty:
    return df
  
  # merge dataframes by ADM1_NAME
  dfFull = pd.concat([dfFull, df], axis=0, ignore_index=True)
  
  return dfFull

## Leer datos 

### Datos de acumulado (anuales por estado)

In [78]:
dfFullSum = pd.DataFrame()

for path in filesNacionalesSum:
  
  dfFullSum = readFileSentinel(path, dfFullSum, 'sum')
  
dfFullSum.head()

Unnamed: 0,sum,Shape_Leng,Shape_Area,ADM1_NAME,date,quimico
0,3195.479256,2.805149,0.455086,Aguascalientes,2018-01-01,$CO$
1,75393.481781,31.487146,6.921294,Baja California,2018-01-01,$CO$
2,68894.335963,56.050239,6.40515,Baja California Sur,2018-01-01,$CO$
3,48385.760098,25.072991,4.888117,Campeche,2018-01-01,$CO$
4,59911.51182,15.500275,6.231878,Chiapas,2018-01-01,$CO$


## Datos de AGEBS (mensuales por ageb)

In [79]:
agebs_files = glob.glob("data/sentinel/AGEBS/month/*.csv")
agebs_files[:5]

['data/sentinel/AGEBS/month\\sum_O3_2018-07-01.csv',
 'data/sentinel/AGEBS/month\\sum_O3_2018-08-01.csv',
 'data/sentinel/AGEBS/month\\sum_O3_2018-09-01.csv',
 'data/sentinel/AGEBS/month\\sum_O3_2018-10-01.csv',
 'data/sentinel/AGEBS/month\\sum_O3_2018-11-01.csv']

In [90]:
def readAgebMonthly(path, dfFull):
  df = pd.read_csv(path)
  df.drop(columns=["system:index"], inplace=True)
  name_Quimico = path.split("_")[1].lower()
  date = path.split("_")[2][:10]
  
  df['quimico'] = replace_emision[name_Quimico]
  df['date'] = date
  
  return df

dfFullAgebs = pd.DataFrame()

for path in agebs_files:
  dfFullAgebs = readAgebMonthly(path, dfFullAgebs)
dfFullAgebs.head()

Unnamed: 0,sum,CVE_LOC,CVE_MUN,CVE_ENT,CVEGEO,CVE_AGEB,quimico,date
0,1.160822,1,1,6,0600100010081,0081,$O_3$,2022-04-01
1,0.520858,1,1,6,060010001019A,019A,$O_3$,2022-04-01
2,0.193654,1,1,6,0600100010359,0359,$O_3$,2022-04-01
3,1.726554,1,1,6,0600100010151,0151,$O_3$,2022-04-01
4,1.592069,1,1,6,0600100010166,0166,$O_3$,2022-04-01


In [87]:
replace_emision['co']

'$CO$'