In [1]:
# Import packages
import pandas as pd
import xlrd
import glob

In [2]:
# Global variables
PATH = '../datas/raw/mr-ssa/airquality'
AIRQ_OUT = '../datas/preprocessing/mr-ssa/airquality'

In [3]:
# Function to generate csv from xlsx
def gen_csv(files_in: [str], path_out: str):
    for file in files_in:
        print(f'Starting {file}')
        
        # Open excel file
        wb = xlrd.open_workbook(file)
        
        # Get station name
        station = file.split('/')[-1]
        station = station[:-5]
        
        # Get all sheets
        sheet_names = wb.sheet_names()
        
        for sheet_name in sheet_names:
            # Get sheet
            sheet = wb.sheet_by_name(sheet_name)

            # Rows number
            nrows = sheet.nrows

            # Get cols names
            cols = sheet.row_values(2)

            # Get units
            units = sheet.row_values(3)

            # Concatenate cols with units
            for i in range(1, len(cols)):
                cols[i] = f'{cols[i]} ({units[i]})'

            # Init dataframe
            df = pd.DataFrame(columns=cols)

            # Get only valid datas
            for r in range(4, nrows-8):
                aux = pd.DataFrame([sheet.row_values(r)], columns=cols)
                df = pd.concat([df, aux], ignore_index=True)

            # Generate output name
            f_out = f'{path_out}/{station}-{sheet_name}'
            
            # Generate gzip output file
            df.to_csv(f'{f_out}.csv.gz', index=False, compression='gzip')
            
            print(f'[DONE] {f_out}.zip')
            
        print(f'Ending {file}')
    print('Ending all.')

In [4]:
# Get air quality excel files
f_airq = [f for f in glob.glob(PATH + "/*.xlsx")]
f_airq

['../datas/raw/mr-ssa/airquality/av-acm-detran.xlsx',
 '../datas/raw/mr-ssa/airquality/barros-reis.xlsx',
 '../datas/raw/mr-ssa/airquality/campo-grande.xlsx',
 '../datas/raw/mr-ssa/airquality/itaigara.xlsx',
 '../datas/raw/mr-ssa/airquality/paralela.xlsx',
 '../datas/raw/mr-ssa/airquality/piraja.xlsx']

In [5]:
gen_csv(f_airq, AIRQ_OUT)

Starting ../datas/raw/mr-ssa/airquality/av-acm-detran.xlsx
[DONE] ../datas/preprocessing/mr-ssa/airquality/av-acm-detran-2013.zip
[DONE] ../datas/preprocessing/mr-ssa/airquality/av-acm-detran-2014.zip
[DONE] ../datas/preprocessing/mr-ssa/airquality/av-acm-detran-2015.zip
Ending ../datas/raw/mr-ssa/airquality/av-acm-detran.xlsx
Starting ../datas/raw/mr-ssa/airquality/barros-reis.xlsx
[DONE] ../datas/preprocessing/mr-ssa/airquality/barros-reis-2013.zip
[DONE] ../datas/preprocessing/mr-ssa/airquality/barros-reis-2014.zip
[DONE] ../datas/preprocessing/mr-ssa/airquality/barros-reis-2015.zip
Ending ../datas/raw/mr-ssa/airquality/barros-reis.xlsx
Starting ../datas/raw/mr-ssa/airquality/campo-grande.xlsx
[DONE] ../datas/preprocessing/mr-ssa/airquality/campo-grande-2013.zip
[DONE] ../datas/preprocessing/mr-ssa/airquality/campo-grande-2014.zip
[DONE] ../datas/preprocessing/mr-ssa/airquality/campo-grande-2015.zip
Ending ../datas/raw/mr-ssa/airquality/campo-grande.xlsx
Starting ../datas/raw/mr-ssa