In [1]:
import glob
import pandas as pd
from datetime import datetime

In [2]:
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

In [3]:
def extract_from_json(file_to_process):
    dataframe = pd.read_json(file_to_process)
    return dataframe


def extract_from_csv(file_to_process):
    dataframe = pd.read_csv(file_to_process)
    return dataframe

In [4]:
columns=["Name","Market Cap (US$ Billion)"]

In [5]:
def extract():
    
    extracted_data = pd.DataFrame(columns=columns)
    
    
    for jsonfile in glob.glob("*.json"):
        extracted_data = extracted_data.append(extract_from_json(jsonfile), ignore_index=True)
        
    return extracted_data

In [6]:
df = pd.read_csv("exchange_rates_1.csv", index_col=0)
df.head()

Unnamed: 0_level_0,rates
Currency,Unnamed: 1_level_1
AED,3.984422
AFN,97.333243
ALL,115.821959
AMD,428.130276
ANG,1.948465


In [7]:
exchange_rate = df.loc[["GBP"]]
exchange_rate

Unnamed: 0_level_0,rates
Currency,Unnamed: 1_level_1
GBP,0.886906


In [8]:
def transform(data):
    
    data["Market Cap (US$ Billion)"] = round(0.886906 * data["Market Cap (US$ Billion)"], 3)
    data.rename(columns={"Market Cap (US$ Billion)": "Market Cap (GBP$ Billion)"}, inplace=True)
    return data

In [9]:
def load(target_file, data_to_load):
    
    data_to_load.to_csv(target_file, index=False)

In [10]:
def log(message):
    timestamp_format = "%Y-%h-%d-%H:%M:%S"
    now = datetime.now() 
    timestamp = now.strftime(timestamp_format)
    with open("logfile.txt","a") as f:
        f.write(timestamp + ',' + message + "\n")

In [11]:
log("ETL Job Started")
log("Extract phase Started")

In [12]:
extracted_data = extract()

extracted_data.head(10)

Unnamed: 0,Name,Market Cap (US$ Billion)
0,JPMorgan Chase,368.78
1,Industrial and Commercial Bank of China,295.65
2,Bank of America,279.73
3,Wells Fargo,214.34
4,China Construction Bank,207.98
5,Agricultural Bank of China,181.49
6,HSBC Holdings PLC,169.47
7,Citigroup Inc.,163.58
8,Bank of China,151.15
9,China Merchants Bank,133.37


In [13]:
log("Extract phase Ended")

In [14]:
transformed_data = transform(extracted_data)

transformed_data.head(10)

Unnamed: 0,Name,Market Cap (GBP$ Billion)
0,JPMorgan Chase,327.073
1,Industrial and Commercial Bank of China,262.214
2,Bank of America,248.094
3,Wells Fargo,190.099
4,China Construction Bank,184.459
5,Agricultural Bank of China,160.965
6,HSBC Holdings PLC,150.304
7,Citigroup Inc.,145.08
8,Bank of China,134.056
9,China Merchants Bank,118.287


In [15]:
log("Transform phase Ended")

In [16]:
log("Load phase Started")

In [17]:
load("bank_market_cap_gbp.csv", transformed_data)

In [18]:
log("Load phase Ended")