In [4]:
import os
import sqlite3
import pandas as pd

def initialize_database():
    # Define the path to the SQLite database within the Docker container (see databases in `volumes` in docker-compose.yaml)
    data_directory = 'databases'
    db_path = os.path.join(data_directory, 'streamlit.db')

    # Ensure the data directory exists
    if not os.path.exists(data_directory):
        os.makedirs(data_directory)

    # Connect to the SQLite database (this will create it if it does not exist)
    conn = sqlite3.connect(db_path)

    # bank_synced	bank_synced_date	erp_synced	erp_synced_date
    # Define bank column data types, assuming 'date' is already properly formatted
    dtype_bank = {
        'id': int,
        'date': str,  # Handling date as string if already in 'YYYY-MM-DD' format
        'type': str,
        'sender': str,
        'description': str,
        'amount': float,
        'bank_synced': str,
        'bank_sync_date': str,
        'erp_synced': str,
        'erp_sync_date': str
    }
    
    # Read data from CSV files located in the /home/jovyan/notebooks/csv directory
    df_client = pd.read_csv('csv/02-client.csv')
    df_student = pd.read_csv('csv/03-student.csv')
    df_bank = pd.read_csv('csv/01-bank.csv', dtype=dtype_bank, usecols=list(dtype_bank.keys()))
    
    # Import data into SQLite, creating tables for each set of data
    df_client.to_sql('client', conn, if_exists='replace', index=False)
    df_student.to_sql('student', conn, if_exists='replace', index=False)
    df_bank.to_sql('bank', conn, if_exists='replace', index=False)
    
    # Close the connection to the database
    conn.close()
    
    print("Database initialized and data imported successfully.")

# Run the function to initialize the database
if __name__ == '__main__':
    initialize_database()

Database initialized and data imported successfully.


In [2]:
import sqlite3
import pandas as pd

def check_database():
    # Path to the SQLite database
    db_path = 'databases/streamlit.db'

    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)

    # Create a cursor object using the cursor() method
    cursor = conn.cursor()

    # Check tables in the database
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    print("Tables in the database:", tables)

    # Fetch and print the first few rows from each table to check
    for table_name in tables:
        table_name = table_name[0]  # table_name is a tuple
        query = f"SELECT * FROM {table_name} LIMIT 5;"
        df = pd.read_sql_query(query, conn)
        print(f"First few rows from {table_name}:")
        print(df)
        print("\n")  # Print a newline for better separation between tables

    # Close the connection
    conn.close()

# Run the function
if __name__ == '__main__':
    check_database()

Tables in the database: [('client',), ('student',), ('bank',)]
First few rows from client:
   client id     name last name              email1                email2  \
0      33111   Karina     Weeks    KWeeks@gmail.com    KWeeks@hotmail.com   
1      33112   Anders     Klein    AKlein@gmail.com    AKlein@hotmail.com   
2      33113  Elianna     Evans    EEvans@gmail.com    EEvans@hotmail.com   
3      33114    Elias   Cabrera  ECabrera@gmail.com  ECabrera@hotmail.com   
4      33115  Daleyza   Richard  DRichard@gmail.com  DRichard@hotmail.com   

     handle account number  
0    KWeeks    ES408636776  
1    AKlein    ES137237055  
2    EEvans    ES719897276  
3  ECabrera    ES909374860  
4  DRichard    ES251352145  


First few rows from student:
   student id student name student last name  grade  associated client id
0      530222        Casey          Palacios     10                 33193
1      530223         Cory             Ahmed      9                 33196
2      530224      