In [2]:
# Complete the following practice exercises:


# 1) The data available has four headers: 'car_model', 'year_of_manufacture', 'price', 'fuel'. Implement the extraction process for the CSV, JSON, and XML files.
# 2) Transform the values under the 'price' header such that they are rounded to 2 decimal places.
# 3) Implement the loading function for the transformed data to a target file, transformed_data.csv.
# 4) Implement the logging function for the entire process and save it in log_file.txt.
# 5) Test the implemented functions and log the events as done in the lab.

In [None]:
import glob 
import pandas as pd 
import xml.etree.ElementTree as ET 
from datetime import datetime 

log_file = "log_file.txt" 
target_file = "transformed_data.csv"


def extract(): 
    extracted_data = pd.DataFrame(columns=['car_model','year_of_manufacture','price','fuel'])
    
    for csvfile in glob.glob("*.csv"): 
        if csvfile != target_file:  # check if the file is not the target file
            extracted_data = pd.concat([extracted_data, pd.DataFrame(extract_from_csv(csvfile))], ignore_index=True) 
         
    # process all json files 
    for jsonfile in glob.glob("*.json"): 
        extracted_data = pd.concat([extracted_data, pd.DataFrame(extract_from_json(jsonfile))], ignore_index=True) 
     
    # process all xml files 
    for xmlfile in glob.glob("*.xml"): 
        extracted_data = pd.concat([extracted_data, pd.DataFrame(extract_from_xml(xmlfile))], ignore_index=True) 
         
    return extracted_data

# ১. CSV ফাইল থেকে ডাটা সংগ্রহের ফাংশন
def extract_from_csv(file_to_process):
    return pd.read_csv(file_to_process)

# ২. JSON ফাইল থেকে ডাটা সংগ্রহের ফাংশন
def extract_from_json(file_to_process):
    return pd.read_json(file_to_process, lines=True)

# ৩. XML ফাইল থেকে ডাটা সংগ্রহের ফাংশন
def extract_from_xml(file_to_process):
    dataframe = pd.DataFrame(columns=['car_model','year_of_manufacture','price','fuel'])
    tree = ET.parse(file_to_process)
    root = tree.getroot()
    return dataframe

def transform(data): 
    # ল্যাবের রিকোয়ারমেন্ট অনুযায়ী শুধু price রাউন্ড করা
    data['price'] = data['price'].astype(float).round(2) 
    return data

def log_progress(message): 
    timestamp_format = '%Y-%h-%d-%H:%M:%S' # Year-Monthname-Day-Hour-Minute-Second 
    now = datetime.now() # get current timestamp 
    timestamp = now.strftime(timestamp_format) 
    with open(log_file,"a") as f: 
        f.write(timestamp + ',' + message + '\n') 
        
def load_data(target_file, transformed_data): 
    transformed_data.to_csv(target_file) 

log_progress("ETL Job Started") 
 
# Log the beginning of the Extraction process 
log_progress("Extract phase Started") 
extracted_data = extract() 
 
# Log the completion of the Extraction process 
log_progress("Extract phase Ended") 
 
# Log the beginning of the Transformation process 
log_progress("Transform phase Started") 
transformed_data = transform(extracted_data) 
print("Transformed Data") 
print(transformed_data) 
 
# Log the completion of the Transformation process 
log_progress("Transform phase Ended") 
 
# Log the beginning of the Loading process 
log_progress("Load phase Started") 
load_data(target_file,transformed_data) 
 
# Log the completion of the Loading process 
log_progress("Load phase Ended") 
 
# Log the completion of the ETL process 
log_progress("ETL Job Ended") 



Transformed Data
        car_model year_of_manufacture     price    fuel
0            ritz                2014   5000.00  Petrol
1             sx4                2013   7089.55  Diesel
2            ciaz                2017  10820.90  Petrol
3         wagon r                2011   4253.73  Petrol
4           swift                2014   6865.67  Diesel
5   vitara brezza                2018  13805.97  Diesel
6            ciaz                2015  10074.63  Petrol
7         s cross                2015   9701.49  Diesel
8            ciaz                2016  13059.70  Diesel
9            ciaz                2015  11119.40  Diesel
10       alto 800                2017   4253.73  Petrol
11           ciaz                2015  10223.88  Diesel
12           ciaz                2015  11194.03  Petrol
13         ertiga                2015   9104.48  Petrol
14          dzire                2009   3358.21  Petrol
15         ertiga                2016  11567.16  Diesel
16         ertiga              