Import Libraries and setup logging

In [2]:
import pandas as pd
import requests
import logging
import sqlite3
#import keys \
import os
import json
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')



Load CSV and inspect columns

In [3]:
transactions = pd.read_csv('Data/transactions.csv')
account_holders = pd.read_csv('Data/account_holders.csv')


print("Transactions Columns:", transactions.columns)
print("Account Holders Columns:", account_holders.columns)

FileNotFoundError: [Errno 2] No such file or directory: 'Data/transactions.csv'

Remove Missing values

In [None]:
transactions.dropna(inplace=True)
account_holders.dropna(inplace=True)

Correcting data types

In [None]:
if 'transaction_date' in transactions.columns:
    transactions['transaction_date'] = pd.to_datetime(transactions['transaction_date'])
else:
    logging.warning('Column transaction_date not found in transactions dataset.')

if 'account_creation_date' in account_holders.columns:
    account_holders['account_creation_date'] = pd.to_datetime(account_holders['account_creation_date'])
else:
    logging.warning('Column account_creation_date not found in account_holders dataset.')



Merge Data

In [None]:
if 'account_id' in transactions.columns and 'account_id' in account_holders.columns:
    merged_data = pd.merge(transactions, account_holders, on='account_id', how='inner')
else:
    logging.error('account_id column not found in one of the datasets.')

Display merged data

In [None]:
print(merged_data.head())


Store merged_data in SQLite database

In [None]:
conn = sqlite3.connect('Data/cleaned_data.db')
merged_data.to_sql('bank_transactions', conn, if_exists='replace', index=False)


Fetch API Data

In [None]:
def fetch_exchange_rates(api_key):
    url = f'https://openexchangerates.org/api/latest.json?app_id={api_key}'
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTP Error for bad responses
        logging.info('Successfully fetched exchange rates data.')
        return response.json()
    except requests.exceptions.RequestException as e:
        logging.error(f'Error fetching exchange rates data: {e}')
        return None


def fetch_market_data(api_key, symbol):
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&apikey={api_key}'
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTP Error for bad responses
        logging.info('Successfully fetched market data.')
        return response.json()
    except requests.exceptions.RequestException as e:
        logging.error(f'Error fetching market data: {e}')
        return None


exchange_rates = fetch_exchange_rates(keys.EXCHANGE_RATES_API_KEY)
market_data = fetch_market_data(keys.ALPHA_VANTAGE_API_KEY, keys.MARKET_SYMBOL)


Store API data in SQLite

In [None]:
if exchange_rates: # Convert exchange rates to a DataFrame
    rates_df = pd.DataFrame(exchange_rates['rates'].items(), columns=['currency', 'rate'])
    rates_df['timestamp'] = pd.to_datetime(exchange_rates['timestamp'], unit='s')
    rates_df.to_sql('exchange_rates', conn, if_exists='replace', index=False)
else:
    logging.warning('Exchange rates data not available.')

if market_data:  # Extract and convert market data to a DataFrame
    market_df = pd.DataFrame(market_data['Time Series (Daily)']).T
    market_df.index = pd.to_datetime(market_df.index)
    market_df.reset_index(inplace=True)
    market_df.rename(columns={'index': 'date'}, inplace=True)
    market_df.to_sql('market_data', conn, if_exists='replace', index=False)
else:
    logging.warning('Market data not available.')


Get Data from SQL and output to csv

In [None]:
# Total transactions per account
query1 = """
SELECT account_id, COUNT(transaction_id) as total_transactions
FROM bank_transactions
GROUP BY account_id
"""
total_transactions = pd.read_sql_query(query1, conn)
total_transactions.to_csv('out/total_transactions_per_account.csv', index=False)
print(total_transactions.head())

# Sum of transaction amounts per account
query2 = """
SELECT account_id, SUM(transaction_amount) as total_amount
FROM bank_transactions
GROUP BY account_id
"""
total_amounts = pd.read_sql_query(query2, conn)
total_amounts.to_csv('out/total_amounts_per_account.csv', index=False)
print(total_amounts.head())

# Latest exchange rates
query3 = """
SELECT * FROM exchange_rates
"""
exchange_rates_df = pd.read_sql_query(query3, conn)
exchange_rates_df.to_csv('out/exchange_rates.csv', index=False)
print(exchange_rates_df.head())

# Recent market data
query4 = """
SELECT * FROM market_data
"""
market_data_df = pd.read_sql_query(query4, conn)
market_data_df.to_csv('out/market_data.csv', index=False)
print(market_data_df.head())

conn.close()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd


# total_transactions, total_amounts, exchange_rates_df, market_data_df

# 1. Total Transactions per Account
plt.figure(figsize=(10, 6))
plt.bar(total_transactions['account_id'], total_transactions['total_transactions'], color='skyblue')
plt.title('Total Transactions per Account')
plt.xlabel('Account ID')
plt.ylabel('Total Transactions')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('out/total_transactions_per_account.png')
plt.show()

# 2. Sum of Transaction Amounts per Account
plt.figure(figsize=(10, 6))
plt.bar(total_amounts['account_id'], total_amounts['total_amount'], color='lightgreen')
plt.title('Sum of Transaction Amounts per Account')
plt.xlabel('Account ID')
plt.ylabel('Total Amount')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('out/total_amounts_per_account.png')
plt.show()

# 3. Latest Exchange Rates
plt.figure(figsize=(10, 6))
plt.bar(exchange_rates_df['currency'], exchange_rates_df['rate'], color='orange')
plt.title('Latest Exchange Rates')
plt.xlabel('Currency')
plt.ylabel('Exchange Rate')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('out/exchange_rates.png')
plt.show()

# 4. Recent Market Data (assuming 'date' and 'close' columns)
plt.figure(figsize=(10, 6))
plt.plot(market_data_df['date'], market_data_df['4. close'], marker='o', linestyle='-', color='purple')
plt.title('Recent Market Data')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('out/recent_market_data.png')
plt.show()
