In [5]:
#Imports
import pandas as pd
import os

In [6]:
# Load the CSV data
file_path = r'C:\Users\Lane\Documents\Projects\trading_bot\data\old data\Accounts_History_2023.csv'  # Update this path as needed
data = pd.read_csv(file_path)

In [7]:
# Step 1: Remove any leading/trailing whitespace from all string columns
data = data.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [8]:
# Step 2: Remove rows where 'ticker' is NaN to clean extraneous text
data = data.dropna(subset=['Symbol'])  # 'Symbol' is renamed to 'ticker' later in Step 3

In [9]:
# Step 3: Rename columns
rename_columns = {
    'Run Date': 'transaction_date',
    'Account': 'portfolio_name',
    'Action': 'notes',
    'Symbol': 'ticker',
    'Description': 'asset_name',
    'Quantity': 'quantity',
    'Price': 'price',
    'Amount': 'transaction_amount',
    'Commission': 'commission',
    'Fees': 'fees'
}
data = data.rename(columns=rename_columns)


In [10]:
# Step 4: Fill missing values in 'commission' and 'fees' columns with 0
data['commission'] = data['commission'].fillna(0)
data['fees'] = data['fees'].fillna(0)

In [11]:
# Step 5: Delete unnecessary columns
columns_to_delete = [
    'Type', 'Exchange Quantity', 'Exchange Currency', 'Currency', 
    'Exchange Rate', 'Accrued Interest', 'Settlement Date'
]
data = data.drop(columns=columns_to_delete, errors='ignore')

In [12]:
# Step 6: Reorder columns to match the specified order
final_columns_order = [
    'ticker', 'asset_name', 'quantity', 'price', 'transaction_amount', 
    'commission', 'fees', 'portfolio_name', 'transaction_date', 'notes'
]
data = data[final_columns_order]

In [13]:
# Step 7: Convert 'transaction_date' to the correct format for SQL insertion (YYYY-MM-DD)
data['transaction_date'] = pd.to_datetime(data['transaction_date'], format='%m/%d/%Y').dt.strftime('%Y-%m-%d')

In [None]:
#Step 8: Remove the leading dash (-) from any ticker in the 'ticker' column

In [None]:
# Step 9: Save to a new CSV file with '_cleaned' suffix
cleaned_file_path = os.path.splitext(file_path)[0] + '_cleaned.csv'
data.to_csv(cleaned_file_path, index=False)

print(f"Cleaned file saved as: {cleaned_file_path}")

Cleaned file saved as: Accounts_History_2024_cleaned.csv


In [None]:
# Step 10: Insert Data into PostgreSQL
import pandas as pd
import psycopg2
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv('../server_credentials.env')

# Set up database connection using environment variables
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')
db_name = os.getenv('DB_NAME')

# Create SQLAlchemy engine for database connection
engine = create_engine(f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')

# Insert data into the database
try:
    with engine.connect() as connection:
        data.to_sql('asset_ledger', con=connection, if_exists='append', index=False)
    # Step 9d: Confirm successful insertion
    print("Data successfully inserted into the database.")
except Exception as e:
    # Step 9d: Handle errors during insertion
    print(f"An error occurred during data insertion: {e}")


ValueError: invalid literal for int() with base 10: 'None'