In [2]:
import os
import sqlite3
import pandas as pd

def initialize_database_remote():
    # Define the path to the SQLite database within the Docker container (see databases in `volumes` in docker-compose.yaml)
    data_directory = 'databases'
    db_path_remote_bank = os.path.join(data_directory, 'remote_bank.db')

    # Ensure the data directory exists
    if not os.path.exists(data_directory):
        os.makedirs(data_directory)

    # Connect to the SQLite database (this will create it if it does not exist)
    conn = sqlite3.connect(db_path_remote_bank)

    # Define bank column data types, assuming 'date' is already properly formatted
    dtype_remote_bank = {
        'id': int,
        'date': str,  # Handling date as string if already in 'YYYY-MM-DD' format
        'type': str,
        'sender': str,
        'description': str,
        'amount': float
    }
    
    # Read data from CSV files located in the /home/jovyan/notebooks/csv directory
    df_remote_bank = pd.read_csv('csv/04-remote_bank.csv', dtype=dtype_remote_bank, usecols=list(dtype_remote_bank.keys()))
    
    # Import data into SQLite, creating tables for each set of data
    df_remote_bank.to_sql('bank_remote', conn, if_exists='replace', index=False)
    
    # Close the connection to the database
    conn.close()
    
    print("Database initialized and data imported successfully.")

# Run the function to initialize the database
if __name__ == '__main__':
    initialize_database_remote()

Database initialized and data imported successfully.


In [5]:
import sqlite3
import pandas as pd

def check_database():
    # Path to the SQLite database
    db_path_remote_bank = 'databases/remote_bank.db'

    # Connect to the SQLite database
    conn = sqlite3.connect(db_path_remote_bank)

    # Create a cursor object using the cursor() method
    cursor = conn.cursor()

    # Check tables in the database
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    print("Tables in the database:", tables)

    # Fetch and print the first few rows from each table to check
    for table_name in tables:
        table_name = table_name[0]  # table_name is a tuple
        query = f"SELECT * FROM {table_name} LIMIT 5;"
        df = pd.read_sql_query(query, conn)
        print(f"First few rows from {table_name}:")
        print(df)
        print("\n")  # Print a newline for better separation between tables

    # Close the connection
    conn.close()

# Run the function
if __name__ == '__main__':
    check_database()

Tables in the database: [('bank_remote',)]
First few rows from bank_remote:
         id      date                   type       sender         description  \
0  10000002  13/01/22  direct debit received    Acme Inc.    Gaven Ariana 4 8   
1  10000007  22/02/22        Expense payment         None                None   
2  10000001  27/02/22  rejected direct debit  Witch Foods  Globex Corporation   
3  10000006  13/03/22      transfer received         None               33129   
4  10000005  25/04/22  direct debit received    Acme Inc.                 Bob   

   amount  
0  386.13  
1 -434.58  
2 -223.16  
3  856.76  
4  581.58  


