In [13]:
import yfinance as yf
import pandas as pd
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Database connection setup
db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")
db_host = os.getenv("DB_HOST")
db_name = os.getenv("DB_NAME")

db_url = f"postgresql+psycopg2://{db_user}:{db_password}@{db_host}/{db_name}"
engine = create_engine(db_url)

# Fetch symbols from the Asset table in PostgreSQL
symbols = []
with engine.connect() as conn:
    result = conn.execute(text("SELECT AssetCode FROM Asset where AssetStatus = 'Active'"))
    for row in result:
        symbols.append(row[0])

# Fetch and process data for each symbol
all_data = []

for symbol in symbols:
    try:
        # Download stock data for each symbol
        df = yf.download(tickers=symbol, period="5d", interval="1d")
        
        # If no data is returned, skip this symbol
        if df.empty:
            print(f"No data for symbol: {symbol}")
            continue

        # Keep only relevant columns, rename, and add Symbol column
        df = df[['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']].copy()
        df.columns = ['Adj_Close', 'Close', 'High', 'Low', 'Open', 'Volume']
        df['Symbol'] = symbol
        df.reset_index(inplace=True)  # Flatten the Date index

        all_data.append(df)

    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")

# Combine all data into a single DataFrame with uniform columns
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    
    # Display a sample to verify data structure
    print("Sample data from combined DataFrame:")
    print(combined_df.head())

    # Define the table structure in PostgreSQL (create if not exists)
    table_name = 'pricestaging'
    combined_df.to_sql(table_name, con=engine, if_exists='replace', index=False)

    # Insert or update data in the staging table
    combined_df.to_sql(name=table_name, con=engine, if_exists='append', index=False, method='multi')

    print("Data fetch and insertion complete.")
else:
    print("No data fetched. Exiting.")


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Sample data from combined DataFrame:
                       Date   Adj_Close       Close        High        Low  \
0 2024-11-01 00:00:00+00:00   97.580002   97.580002   98.300003  97.419998   
1 2024-11-04 00:00:00+00:00   98.400002   98.400002   99.879997  98.360001   
2 2024-11-05 00:00:00+00:00   99.209999   99.209999  101.089996  98.955002   
3 2024-11-06 00:00:00+00:00   96.730003   96.730003   97.320000  94.760002   
4 2024-11-07 00:00:00+00:00  100.809998  100.809998  100.830002  99.180000   

        Open    Volume Symbol  
0  98.220001   8501000   BABA  
1  98.820000   8681900   BABA  
2  99.970001  10804200   BABA  
3  95.800003  18140600   BABA  
4  99.260002  11267145   BABA  
Data fetch and insertion complete.
