In [9]:
import logging
import os
import pandas as pd
from datetime import datetime

from config1.cfg import LOG_ETL, ROOT_CSV, ROOT_SQL, ROOT_TXT
from db.db_connection import create_engine_connection
from utils.utils import create_folder, create_txt, get_filename_path
from utils.transform import normalize_data

# Use el log que cree en la etl
log_name = LOG_ETL + datetime.today().strftime('%Y-%m-%d')
logger = logging.getLogger(log_name)


def extract_data():
   
    logger.info('*task_exctract')
   
    create_folder(ROOT_CSV)
   
    engine = create_engine_connection()
  
    sql_files = get_filename_path(ROOT_SQL)

    with engine.connect() as connection:
        
        for sql_file_name, sql_full_path in sql_files.items():
            with open(sql_full_path) as f:
               
                query = f.read()
                logger.info('Extracting data from {}'.format(sql_file_name))
               #extrayendo y convirtiendo a fichero sql
                result = connection.execute(query)
                
                df = pd.DataFrame(result)
                logger.info('Writing information to csv.')
                
                df.to_csv(os.path.join(
                    ROOT_CSV, f'{sql_file_name[:-4]}.csv'), index=False)
    logger.info('Extracting data from database.')
    #csv obtenidos

def transform_data():
  
    logger.info('transform_task')
    
    create_folder(ROOT_TXT)

    csv_files = get_filename_path(ROOT_CSV)
    for csv_name, csv_path in csv_files.items():
        logger.info('Working on {} file.'.format(csv_name))
       
        dataframe = pd.read_csv(csv_path)
        
        logger.info('Clearing data on {} file.'.format(csv_name))
        dataframe = normalize_data(dataframe)
       
        logger.info('Creating txt for {} file.'.format(csv_name))
        create_txt(dataframe, csv_name[:-4])
    logger.info('Transform data from dataframe/csv.')


def load_data():
   
    logger.info('data_load')
    logger.info('Loading data to S3.')



ModuleNotFoundError: ignored