# Bank Transactions

This notebook ingests _Bank Transactions_ data into Databricks CE.

In [0]:
from pyspark.sql.types import *

In [0]:
# CONFIGURATIONS

DB_NAME = 'bank_db'
DATA_DIR = '/FileStore/bank-transactions/'

# key - table name
# value - tuple, where the first element is CSV file name, and the second element is schema
DATA_FILES = {
  'customers': (
    'customers.csv',
    StructType([
      StructField('customer_id', IntegerType()),
      StructField('name', StringType()),
      StructField('house_number', IntegerType()),
      StructField('street', StringType()),
      StructField('city', StringType()),
      StructField('postcode', StringType())
    ])),
                
  'account_types': (
    'account_types.csv',
    StructType([
      StructField('type', IntegerType()),
      StructField('description', StringType())
    ])),
  
  'transaction_types': (
    'transaction_types.csv',
    StructType([
      StructField('type', IntegerType()),
      StructField('description', StringType())
    ])),
  
  'branches': (
    'branches.csv',
    StructType([
      StructField('branch_id', IntegerType()),
      StructField('house_number', IntegerType()),
      StructField('street', StringType()),
      StructField('city', StringType()),
      StructField('postcode', StringType())
    ])),
  
  'accounts': (
    'accounts.csv',
    StructType([
      StructField('account', IntegerType()),
      StructField('customer_id', IntegerType()),
      StructField('branch_id', IntegerType()),
      StructField('type', IntegerType()),
      StructField('date_opened', DateType()),
      StructField('date_closed', DateType())
    ])),
  
  'transactions': (
    'transactions.csv',
    StructType([
      StructField('account', IntegerType()),
      StructField('time_recorded', DateType()),
      StructField('type', IntegerType()),
      StructField('amount', FloatType())
    ]))
}

In [0]:
spark.sql(f'DROP DATABASE IF EXISTS {DB_NAME} CASCADE')
spark.sql(f'CREATE DATABASE {DB_NAME}')

In [0]:
for table_name, ingest_info in DATA_FILES.items():
  df = spark.read.csv(DATA_DIR + ingest_info[0], schema=ingest_info[1], header=True)
  df.write.saveAsTable(DB_NAME + '.' + table_name)