In a recent report, I encountered data involving multiple currencies, and to standardize the values and facilitate analysis, I converted all amounts into EUR. To achieve this, I implemented an API-based solution that retrieved real-time exchange rates, ensuring consistent and accurate currency conversion for all order values.

In [118]:
import pandas as pd
pd.set_option('display.float_format', '{:.0f}'.format)
import numpy as np
import matplotlib.pyplot as plt
import requests

import hashlib
import random
from datetime import datetime, timedelta

In [119]:
df = pd.read_excel(r'C:\Users\lily\Desktop\IT alapok privát\PORTFOLIO PROJECT\Currency conversion\GRIN04 - PO - HCx category filtered_Aggregate Data 4_Nov_2024_CURR_EMEA_Q1Q2_filtered.xlsx')

#### Data Anonymization

Replace Identifiers with Pseudonyms

In [122]:
df['Order Number'] = df['Order Number'].apply(lambda x: hashlib.md5(str(x).encode()).hexdigest())

Modify Numerical Values

In [124]:
df['Total Order Value'] = df['Total Order Value'] * np.random.uniform(0.8, 1.2)

Mask Text Columns

In [126]:
df['Region'] = df['Region'].apply(lambda x: f"Region-{random.randint(1, 100)}")

Replace Dates with Relative or Random Dates

In [128]:
start_date = datetime(2005, 1, 1)
end_date = datetime(2010, 1, 1)

df['Invoice Received Date'] = df['Invoice Received Date'].apply(
    lambda x: start_date + timedelta(days=random.randint(0, (end_date - start_date).days))
)

Drop Unnecessary Columns

In [130]:
df = df.drop(columns=['Country', 'Status', 'Invoice Type'])

In [131]:
df.head()

Unnamed: 0,Region,Order Number,Total Order Value,Invoice Received Date,Currency
0,Region-20,e9ceb346cf13e845ef576bd60af1fa47,40540986,2006-04-18,HUF
1,Region-85,e9ceb346cf13e845ef576bd60af1fa47,40540986,2009-09-10,HUF
2,Region-60,e9ceb346cf13e845ef576bd60af1fa47,40540986,2008-01-25,HUF
3,Region-95,e9ceb346cf13e845ef576bd60af1fa47,40540986,2005-12-02,HUF
4,Region-72,e9ceb346cf13e845ef576bd60af1fa47,40540986,2009-02-11,HUF


In [132]:
df = df.dropna(subset=["Currency"])

In [133]:
currencies = df["Currency"].unique()
currencies

array(['HUF', 'EUR', 'TRY', 'NOK', 'KES', 'JPY', 'USD', 'NGN', 'KRW',
       'GBP', 'CZK', 'CHF', 'RON', 'SEK', 'RSD', 'DKK', 'PLN', 'HKD',
       'MAD', 'AUD', 'ZAR', 'TWD', 'CAD', 'ILS', 'AED', 'MXN', 'KWD',
       'GHS', 'SGD', 'NZD', 'QAR', 'SAR'], dtype=object)

#### Currency conversion using API

In [136]:
API_KEY = "f92a63f246ce6cfee8218f42" 
BASE_URL = f"https://v6.exchangerate-api.com/v6/{API_KEY}/latest/EUR"

response = requests.get(BASE_URL)
if response.status_code == 200:
    rates_data = response.json()
    exchange_rates = rates_data.get("conversion_rates", {})
else:
    print(f"Error fetching exchange rates: {response.status_code} - {response.text}")
    exchange_rates = {}

In [137]:
def convert_to_eur(row):
    rate = exchange_rates.get(row['Currency'], None)
    if rate is not None and rate > 0: 
        return row['Total Order Value'] / rate
    else:
        return None  

In [138]:
df['Total Order Value in EUR'] = df.apply(convert_to_eur, axis=1)

In [139]:
df.head()

Unnamed: 0,Region,Order Number,Total Order Value,Invoice Received Date,Currency,Total Order Value in EUR
0,Region-20,e9ceb346cf13e845ef576bd60af1fa47,40540986,2006-04-18,HUF,98661
1,Region-85,e9ceb346cf13e845ef576bd60af1fa47,40540986,2009-09-10,HUF,98661
2,Region-60,e9ceb346cf13e845ef576bd60af1fa47,40540986,2008-01-25,HUF,98661
3,Region-95,e9ceb346cf13e845ef576bd60af1fa47,40540986,2005-12-02,HUF,98661
4,Region-72,e9ceb346cf13e845ef576bd60af1fa47,40540986,2009-02-11,HUF,98661


In [140]:
df.to_excel("converted_invoices.xlsx", index=False)