In [333]:
import glob 
import pandas as pd
import requests
from datetime import datetime

In [334]:
logfile = "log.txt"
targetfile = "transformed_data.csv"
columns=['Name','Market Cap (US$ Billion)']

## Extract

In [335]:
def extract_from_json(file_to_process):
    dataframe = pd.read_json(file_to_process)
    return dataframe

In [336]:
def extract():
    extracted_data = pd.DataFrame(columns=columns)
    
    for jsonfile in glob.glob("*json"):
        extracted_data = extracted_data.append(extract_from_json(jsonfile), ignore_index=True)
    
    return extracted_data

In [337]:
def extract_rates():
    for csvfile in glob.glob("*csv"):
        df = pd.read_csv(csvfile)
    return df

In [338]:
def get_rate(data, currency):
    rate = data[data["Currency"] == currency].values[0][1]
    return rate

## Transform

In [339]:
def transform(extracted_data, rate, currency):
    extracted_data.columns = ["Name", f"Market Cap({currency}$)Billion"]
    extracted_data.iloc[:, -1] = extracted_data.iloc[:, -1].apply(lambda x: round(x * rate, 3))
    return extracted_data

## Load

In [340]:
def load(targetfile, data_to_load):
    data_to_load.to_csv(targetfile, index=False)

## Log

In [341]:
def log(message):
    timestamp_format = '%Y-%h-%d-%H:%M:%S'
    now = datetime.now()
    timestamp = now.strftime(timestamp_format)
    with open(logfile, "a") as f:
        f.write(timestamp + ', ' + message + '\n')

## Running the ETL Process

### Extracting Phase

In [342]:
log("ETL Job Started")
log("Extract Phase Started")
extracted_data = extract()
rates_data = extract_rates()
rate = get_rate(rates_data, "GBP")
log("Extraction Phase Ended")

  extracted_data = extracted_data.append(extract_from_json(jsonfile), ignore_index=True)


In [343]:
extracted_data.head()

Unnamed: 0,Name,Market Cap (US$ Billion)
0,JPMorgan Chase,390.934
1,Industrial and Commercial Bank of China,345.214
2,Bank of America,325.331
3,Wells Fargo,308.013
4,China Construction Bank,257.399


### Transform Phase

In [344]:
log("Transform Phase Started")
transformed_data = transform(extracted_data, rate, "GBP")
log("Transform Phase Ended")

In [345]:
transformed_data.head()

Unnamed: 0,Name,Market Cap(GBP$)Billion
0,JPMorgan Chase,340.0
1,Industrial and Commercial Bank of China,300.236
2,Bank of America,282.944
3,Wells Fargo,267.882
4,China Construction Bank,223.863


### Load Phase 

In [346]:
log("Load Phase Started")
load(targetfile, transformed_data)
log("Load Phase Ended")

In [347]:
print(rate)

0.869711
