In [1]:
import apache_beam as beam

class ProcessTransactions(beam.PTransform):
    def __init__(self, conversion_rates):
        self.conversion_rates = conversion_rates

    def expand(self, pcoll):
        def convert_to_usd(transaction):
            account, name, amount, currency, city = transaction.split(',')
            amount_usd = float(amount) * self.conversion_rates.get(currency, 1)
            return account, name, amount_usd, "USD", city

        def filter_large_transactions(transaction):
            _, _, amount_usd, _, _ = transaction
            return amount_usd > 100

        return (
            pcoll
            | "Convert to USD" >> beam.Map(convert_to_usd)
            | "Filter Large Transactions" >> beam.Filter(filter_large_transactions)
        )


In [3]:
conversion_rates = {
    'EUR': 1.12,
    'GBP': 1.3,
    'JPY': 0.0094,
    'AUD': 0.7,
    'CNY': 0.15,
    'INR': 0.014,
    'ZAR': 0.07,
    'RUB': 0.014
}

with beam.Pipeline() as p:
    processed_transactions = (
        p
        | "Read from data.txt" >> beam.io.ReadFromText('data.txt')
        | "Process Transactions" >> ProcessTransactions(conversion_rates)
        | "Write Results" >> beam.io.WriteToText('outputProcessTransactions.txt')
    )
