In [1]:
import pandas as pd
import json
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv(r'C:\Users\Lane\Documents\Projects\trading_bot\programs\server_credentials.env')

# Database credentials from environment variables
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')
db_name = os.getenv('DB_NAME')

# Create SQLAlchemy engine for PostgreSQL connection
engine = create_engine(f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')

# Load the CSV file as a DataFrame
file_path = r'C:\Users\Lane\Documents\Projects\trading_bot\data\master_data12.csv'  # Update with the actual path
new_data = pd.read_csv(file_path)

# Convert 'sector' and 'industry' columns to JSON arrays with one element
new_data['sector'] = new_data['sector'].apply(lambda x: json.dumps([x]) if pd.notna(x) else json.dumps([]))
new_data['industry'] = new_data['industry'].apply(lambda x: json.dumps([x]) if pd.notna(x) else json.dumps([]))

# Load existing data from the tracked_companies table
with engine.connect() as connection:
    existing_data = pd.read_sql('SELECT symbol, asset_name, sector, industry, first_traded, index_inclusion FROM tracked_companies', connection)



In [2]:
# Step 1: Identify new records that need to be inserted
new_records = new_data[~new_data['symbol'].isin(existing_data['symbol'])]

In [3]:
# Step 2: Insert new records
try:
    with engine.connect() as connection:
        new_records.to_sql('tracked_companies', con=connection, if_exists='append', index=False)
    print("New records successfully inserted into tracked_companies.")
except Exception as e:
    print(f"An error occurred during new record insertion: {e}")

New records successfully inserted into tracked_companies.


In [4]:
# Step 3: Identify records in the database with NULL values in relevant fields
with engine.connect() as connection:
    incomplete_records = pd.read_sql(
        """
        SELECT symbol, asset_name, sector, industry, first_traded, index_inclusion 
        FROM tracked_companies 
        WHERE asset_name IS NULL OR sector IS NULL OR industry IS NULL OR first_traded IS NULL OR index_inclusion IS NULL
        """,
        connection
    )

In [6]:
# Step 4: Update existing records' blank fields
try:
    with engine.connect() as connection:
        for _, row in incomplete_records.iterrows():
            symbol = row['symbol']
            update_values = {}

            # Retrieve the updated information from master_data for the current symbol
            new_info = new_data[new_data['symbol'] == symbol]
            if not new_info.empty:
                # Check each field, and only add to update_values if the field in the server is null
                for col in ['asset_name', 'sector', 'industry', 'first_traded', 'index_inclusion']:
                    if pd.isna(row[col]) or row[col] == '' or row[col] == '[]':
                        if pd.notna(new_info.iloc[0][col]) and new_info.iloc[0][col] != '':
                            # Explicitly handle NaN and None values
                            update_values[col] = new_info.iloc[0][col] if pd.notna(new_info.iloc[0][col]) else None

                # If there are fields to update, execute the update statement
                if update_values:
                    update_values['symbol'] = symbol  # Include symbol for WHERE clause
                    update_stmt = f"""
                    UPDATE tracked_companies
                    SET {', '.join([f"{col} = :{col}" for col in update_values.keys() if col != 'symbol'])}
                    WHERE symbol = :symbol
                    """
                    
                    # Pass update_values as a single dictionary to avoid unexpected keyword issues
                    connection.execute(text(update_stmt), update_values)
                    print(f"Updated {symbol} with values: {update_values}")

    print("Existing records updated where needed.")
except Exception as e:
    print(f"An error occurred during record updates: {e}")

  if pd.isna(row[col]) or row[col] == '' or row[col] == '[]':


Updated CCJ with values: {'index_inclusion': '2016/12/01', 'symbol': 'CCJ'}
Existing records updated where needed.
