In [None]:
import glob
import pandas as pd
import logging 
from datetime import datetime
import xml.etree.ElementTree as xml

#Logging configuration
logging.basicConfig(
    filename='log_file.txt',  
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s', 
    datefmt='%Y-%m-%d %H:%M:%S' 
)

final_df = pd.DataFrame()

def extract_csv(filename):

    '''
        Function to extract the data from the CSV file
    '''
    try :
        logging.info(f"Extracting CSV file Started :{filename}")
        temp_df = pd.read_csv(filename)
        
    except Exception as e :
        logging.info(f"Error Occured While Extracting the data from csv file {filename} : {e}")
        return None
    else:
        logging.info(f"Extraction is Completed for the file {filename}")
        return temp_df
    

def extract_json(filename):

    '''
        Function to extract the data from JSON file
    '''
    try :
        logging.info(f"Extracting JSON file Started : {filename}")
        temp_df = pd.read_json(filename)
        

    except Exception as e :
        logging.info(f"Error Occured While Extracting the data from json file {filename} : {e}")
        return None
    
    else:
        logging.info(f"Extraction is Completed for the file {filename}")
        return temp_df
    
def extract_xml(filename):

    '''
        Function to extract the data from XML File
    '''

    try:
        logging.info(f"Extracting XML file Started :{filename}")
        data_list =list()
        tree = xml.parse(filename)

        root = tree.getroot()

        for person in root.findall('person'):
          name = person.find('name').text
          height = float(person.find('height').text)
          weight = float(person.find('weight').text)
          data_list.append([name,height,weight])

        temp_df = pd.DataFrame(data_list,columns=["name","height","weight"])

    except Exception as e:
        logging.info(f"Error Occured While Extracting the data from xml file {filename} : {e}")
        return None

    else:
        logging.info(f"Extraction is Completed for the file {filename}")
        return temp_df


def extract_data(files):

    '''
        Main Function to call the file based extract function to extract the data
    '''

    csv_df = pd.DataFrame()
    data_df = pd.DataFrame()
    json_df = pd.DataFrame()
    xml_df = pd.DataFrame()

    logging.info("Started Extracting data from the Files")
    logging.info(f"Printing the list of files to extract : {files}")

    for file_name in files:
        if(file_name.lower().endswith(".csv")):
           csv_df = pd.concat([csv_df,extract_csv(file_name)])
        if(file_name.lower().endswith(".json")):
            json_df = pd.concat([json_df,extract_json(file_name)])
        if(file_name.lower().endswith(".xml")):
            xml_df = pd.concat([xml_df,extract_xml(file_name)])

    data_df = pd.concat([data_df,csv_df,json_df,xml_df])

    return data_df


def transformation(data_df):

    '''
        Function to transform the data
    '''

    logging.info("Transformation Started :")

    logging.info("Converting Height from Inches to Meters ")
    data_df.height = round(data_df.height*0.0254,2)

    logging.info("Converting Weight from Pounds to Kilograms ")
    data_df.weight = round(data_df.weight*0.45359237,2)

    logging.info("Transformation Ended :")

    return data_df

def loading_data(data_df):

    '''
        Function to load the data to CSV file
    '''

    logging.info("Loading the transformed data in to CSV file for future use ")

    data_df.to_csv("transformed_data.csv",index=False)

    logging.info("Succesfully Loaded the data into csv file")

# Main Program
files = glob.glob(".\\source\\*")

if(files):
    logging.info("ETL Process Started !!!")
    final_df = extract_data(files)
    if(final_df is None):
        print("No Data to transform")
    else:
        transformed_df = transformation(final_df)
        loading_data(transformed_df)
        print("Pls Check the transformed data csv file")
        
else:
    print("No Files to Extract the data")

logging.info("ETL Process Ended !!!")
logging.shutdown()