In [None]:
# notebooks/03_database_storage.ipynb

# 1. Import necessary libraries
import pandas as pd
import os
import sys

# Add the 'scripts' directory to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir, 'scripts')))
from scripts.db_operations import connect_to_oracle, create_tables, insert_review_data

# 2. Define input path for analyzed data
processed_data_dir = os.path.join(os.path.abspath(''), os.pardir, 'data', 'processed')
input_filepath = os.path.join(processed_data_dir, 'fintech_app_reviews_analyzed.csv')

# 3. Load the analyzed data
if not os.path.exists(input_filepath):
    print(f"Error: Analyzed data file not found at {input_filepath}. Please run 02_sentiment_thematic_analysis.ipynb first.")
    df_analyzed = pd.DataFrame() # Empty df to prevent errors
else:
    df_analyzed = pd.read_csv(input_filepath)
    print(f"Loaded {len(df_analyzed)} analyzed reviews for database storage.")

    # Ensure 'Date' column is in datetime format for Oracle insertion
    df_analyzed['Date'] = pd.to_datetime(df_analyzed['Date'])
    # Ensure 'reviewId' is string
    df_analyzed['reviewId'] = df_analyzed['reviewId'].astype(str)

# 4. Connect to Oracle DB, create tables, and insert data
if not df_analyzed.empty:
    connection = None
    try:
        print("\n--- Connecting to Oracle Database ---")
        connection = connect_to_oracle()
        if connection:
            print("\n--- Creating Tables (if not exist) ---")
            if create_tables(connection):
                print("\n--- Inserting Review Data ---")
                insert_review_data(connection, df_analyzed)
            else:
                print("Table creation failed. Cannot proceed with data insertion.")
        else:
            print("Failed to connect to Oracle Database. Check config and connection details.")
    except Exception as e:
        print(f"An unexpected error occurred during database operations: {e}")
    finally:
        if connection:
            connection.close()
            print("Database connection closed.")
else:
    print("No analyzed data loaded. Skipping database operations.")