In [1]:
from app import app, db, bcrypt
from models import User, Stock, Portfolio
from flask import jsonify, request
import yfinance as yf
import pandas as pd
import time
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import os



# Nifty 50 company symbols (Yahoo Finance format)
companyNamesArray = [
    'ADANIPORTS.NS', 'APOLLOHOSP.NS', 'ASIANPAINT.NS', 'AXISBANK.NS', 'BAJAJ-AUTO.NS', 'BAJFINANCE.NS', 'BAJAJFINSV.NS',
    'BPCL.NS', 'BHARTIARTL.NS', 'BRITANNIA.NS', 'CIPLA.NS', 'COALINDIA.NS', 'DIVISLAB.NS', 'DRREDDY.NS', 'EICHERMOT.NS',
    'GRASIM.NS', 'HCLTECH.NS', 'HDFCBANK.NS', 'HDFCLIFE.NS', 'HEROMOTOCO.NS', 'HINDALCO.NS', 'HINDUNILVR.NS',
    'ICICIBANK.NS', 'ITC.NS', 'INDUSINDBK.NS', 'INFY.NS', 'JSWSTEEL.NS', 'KOTAKBANK.NS', 'LT.NS', 'M&M.NS',
    'MARUTI.NS', 'NESTLEIND.NS', 'NTPC.NS', 'ONGC.NS', 'POWERGRID.NS', 'RELIANCE.NS', 'SBILIFE.NS', 'SBIN.NS',
    'SUNPHARMA.NS', 'TATAMOTORS.NS', 'TATASTEEL.NS', 'TCS.NS', 'TECHM.NS', 'TITAN.NS', 'ULTRACEMCO.NS', 'UPL.NS', 'WIPRO.NS'
]

all_data = []
batch_size = 15

for i in range(0, len(companyNamesArray), batch_size):
    batch = companyNamesArray[i:i+batch_size]
    tickers = " ".join(batch)
    try:
        ms = yf.Tickers(tickers)
        data = ms.history(period="max")
        all_data.append(data)
        print(f"Downloaded: {batch}")
    except Exception as e:
        print(f"Failed batch {batch}: {e}")
    time.sleep(65)  # Wait 10 seconds between batches

# Combine all_data into a single DataFrame
stockData_22 = pd.concat(all_data)
stockData_22 = stockData_22.loc[:, (['Close','Volume','Dividends','Stock Splits'], companyNamesArray)]
stockData_22.columns = stockData_22.columns.map(lambda x: f"{x[0]}_{x[1]}")
metrics = ['Close','Volume', 'Dividends', 'Stock Splits']

# Prepare the value_vars dictionary dynamically
value_vars = {}
for company in companyNamesArray:
    for metric in metrics:
        column_name = f"{metric}_{company}"
        if column_name in stockData_22.columns:
            value_vars[column_name] = (company, metric)
            
stockData_22 = stockData_22.reset_index()  # Add this line before melt
df_long = stockData_22.melt(
    id_vars=["Date"], 
    var_name='variable',
    value_name='value',
    value_vars=value_vars
)
df_long[['metric','company']] = df_long['variable'].str.split('_', expand=True)
df_long = df_long.drop(columns='variable')
new_order = ['Date','company','metric','value']
df_long = df_long[new_order]

# Create Metrics Table
metrics_df = df_long[['metric']].drop_duplicates().reset_index(drop=True)
metrics_df['Metric_ID'] = metrics_df.index + 1
metrics_df = metrics_df[['Metric_ID', 'metric']]

# Create Companies Table
companies_df = df_long[['company']].drop_duplicates().reset_index(drop=True)
companies_df['Company_ID'] = companies_df.index + 1
companies_df = companies_df[['Company_ID', 'company']]

# Create Data Table
data_df = df_long.merge(companies_df, on='company').merge(metrics_df, on='metric')
data_df = data_df[['Date', 'Company_ID', 'Metric_ID', 'value']]

# --- Connect to PostgreSQL database (Docker) ---
# Example: postgresql://postgres:password@localhost:5432/your_db
load_dotenv()
#pg_url = f"postgresql://{os.getenv('POSTGRES_USER')}:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/{os.getenv('POSTGRES_DB')}"
#pg_url = "postgresql://postgres:yourpassword@localhost:5432/your_db"
#engine = create_engine(pg_url)

# Connect to the default 'postgres' database
engine = create_engine(f"postgresql://{os.getenv('POSTGRES_USER')}:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/postgres")
with engine.connect() as conn:
    conn.execution_options(isolation_level="AUTOCOMMIT").execute(text(f"CREATE DATABASE {os.getenv('POSTGRES_DB')}"))

pg_url = f"postgresql://{os.getenv('POSTGRES_USER')}:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/{os.getenv('POSTGRES_DB')}"
engine = create_engine(pg_url)

# Write the dataframes to SQL tables
metrics_df.to_sql('Metrics', engine, if_exists='replace', index=False)
companies_df.to_sql('Companies', engine, if_exists='replace', index=False)
data_df.to_sql('Data', engine, if_exists='replace', index=False)

print("Data written to PostgreSQL database.")



[*********************100%***********************]  15 of 15 completed


Downloaded: ['ADANIPORTS.NS', 'APOLLOHOSP.NS', 'ASIANPAINT.NS', 'AXISBANK.NS', 'BAJAJ-AUTO.NS', 'BAJFINANCE.NS', 'BAJAJFINSV.NS', 'BPCL.NS', 'BHARTIARTL.NS', 'BRITANNIA.NS', 'CIPLA.NS', 'COALINDIA.NS', 'DIVISLAB.NS', 'DRREDDY.NS', 'EICHERMOT.NS']


[*********************100%***********************]  15 of 15 completed


Downloaded: ['GRASIM.NS', 'HCLTECH.NS', 'HDFCBANK.NS', 'HDFCLIFE.NS', 'HEROMOTOCO.NS', 'HINDALCO.NS', 'HINDUNILVR.NS', 'ICICIBANK.NS', 'ITC.NS', 'INDUSINDBK.NS', 'INFY.NS', 'JSWSTEEL.NS', 'KOTAKBANK.NS', 'LT.NS', 'M&M.NS']


[*********************100%***********************]  15 of 15 completed


Downloaded: ['MARUTI.NS', 'NESTLEIND.NS', 'NTPC.NS', 'ONGC.NS', 'POWERGRID.NS', 'RELIANCE.NS', 'SBILIFE.NS', 'SBIN.NS', 'SUNPHARMA.NS', 'TATAMOTORS.NS', 'TATASTEEL.NS', 'TCS.NS', 'TECHM.NS', 'TITAN.NS', 'ULTRACEMCO.NS']


[*********************100%***********************]  2 of 2 completed


Downloaded: ['UPL.NS', 'WIPRO.NS']


: 

: 

: 

In [2]:
all_data

NameError: name 'all_data' is not defined

In [1]:
stockData_22

NameError: name 'stockData_22' is not defined

In [None]:
            
stockData_22 = stockData_22.reset_index()  # Add this line before melt
df_long = stockData_22.melt(
    id_vars=["Date"], 
    var_name='variable',
    value_name='value',
    value_vars=value_vars
)
df_long[['metric','company']] = df_long['variable'].str.split('_', expand=True)
df_long = df_long.drop(columns='variable')
new_order = ['Date','company','metric','value']
df_long = df_long[new_order]

# Create Metrics Table
metrics_df = df_long[['metric']].drop_duplicates().reset_index(drop=True)
metrics_df['Metric_ID'] = metrics_df.index + 1
metrics_df = metrics_df[['Metric_ID', 'metric']]

# Create Companies Table
companies_df = df_long[['company']].drop_duplicates().reset_index(drop=True)
companies_df['Company_ID'] = companies_df.index + 1
companies_df = companies_df[['Company_ID', 'company']]

# Create Data Table
data_df = df_long.merge(companies_df, on='company').merge(metrics_df, on='metric')
data_df = data_df[['Date', 'Company_ID', 'Metric_ID', 'value']]

# --- Connect to PostgreSQL database (Docker) ---
# Example: postgresql://postgres:password@localhost:5432/your_db
load_dotenv()
#pg_url = f"postgresql://{os.getenv('POSTGRES_USER')}:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/{os.getenv('POSTGRES_DB')}"
#pg_url = "postgresql://postgres:yourpassword@localhost:5432/your_db"
#engine = create_engine(pg_url)





ObjectNotExecutableError: Not an executable object: 'CREATE DATABASE investments_db'

In [1]:
from sqlalchemy import text
# Connect to the default 'postgres' database
engine = create_engine(f"postgresql://{os.getenv('POSTGRES_USER')}:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/postgres")
with engine.connect() as conn:
    conn.execution_options(isolation_level="AUTOCOMMIT").execute(text(f"CREATE DATABASE {os.getenv('POSTGRES_DB')}"))

pg_url = f"postgresql://{os.getenv('POSTGRES_USER')}:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/{os.getenv('POSTGRES_DB')}"
engine = create_engine(pg_url)

# Write the dataframes to SQL tables
metrics_df.to_sql('Metrics', engine, if_exists='replace', index=False)
companies_df.to_sql('Companies', engine, if_exists='replace', index=False)
data_df.to_sql('Data', engine, if_exists='replace', index=False)

print("Data written to PostgreSQL database."
)


NameError: name 'create_engine' is not defined

In [5]:
from app import app, db, bcrypt
from models import User, Stock, Portfolio
from flask import jsonify, request
import yfinance as yf
import pandas as pd
import time
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import os

# Nifty 50 company symbols (Yahoo Finance format)
companyNamesArray = [
    'ADANIPORTS.NS', 'APOLLOHOSP.NS', 'ASIANPAINT.NS', 'AXISBANK.NS', 'BAJAJ-AUTO.NS', 'BAJFINANCE.NS', 'BAJAJFINSV.NS',
    'BPCL.NS', 'BHARTIARTL.NS', 'BRITANNIA.NS', 'CIPLA.NS', 'COALINDIA.NS', 'DIVISLAB.NS', 'DRREDDY.NS', 'EICHERMOT.NS',
    'GRASIM.NS', 'HCLTECH.NS', 'HDFCBANK.NS', 'HDFCLIFE.NS', 'HEROMOTOCO.NS', 'HINDALCO.NS', 'HINDUNILVR.NS',
    'ICICIBANK.NS', 'ITC.NS', 'INDUSINDBK.NS', 'INFY.NS', 'JSWSTEEL.NS', 'KOTAKBANK.NS', 'LT.NS', 'M&M.NS',
    'MARUTI.NS', 'NESTLEIND.NS', 'NTPC.NS', 'ONGC.NS', 'POWERGRID.NS', 'RELIANCE.NS', 'SBILIFE.NS', 'SBIN.NS',
    'SUNPHARMA.NS', 'TATAMOTORS.NS', 'TATASTEEL.NS', 'TCS.NS', 'TECHM.NS', 'TITAN.NS', 'ULTRACEMCO.NS', 'UPL.NS', 'WIPRO.NS'
]

# Download data in smaller batches with proper error handling
all_data = []
batch_size = 15  # Reduced batch size
max_retries = 3

for i in range(0, len(companyNamesArray), batch_size):
    batch = companyNamesArray[i:i+batch_size]
    tickers = " ".join(batch)
    
    for retry in range(max_retries):
        try:
            ms = yf.Tickers(tickers)
            data = ms.history(period="max")
            if not data.empty:
                all_data.append(data)
                print(f"Downloaded: {batch}")
                break
        except Exception as e:
            print(f"Attempt {retry + 1} failed for batch {batch}: {e}")
            if retry == max_retries - 1:
                print(f"Skipping batch {batch} after {max_retries} attempts")
            time.sleep(10 * (retry + 1))  # Exponential backoff

# Process data in chunks
chunk_size = 100000  # Adjust based on your system's memory

# Combine and process data
try:
    stockData_22 = pd.concat(all_data)
    stockData_22 = stockData_22.loc[:, (['Close','Volume','Dividends','Stock Splits'], companyNamesArray)]
    stockData_22.columns = stockData_22.columns.map(lambda x: f"{x[0]}_{x[1]}")
    stockData_22 = stockData_22.reset_index()
    
    # Create metrics and companies DataFrames (these are usually small)
    metrics = ['Close','Volume', 'Dividends', 'Stock Splits']
    metrics_df = pd.DataFrame({'metric': metrics})
    metrics_df['Metric_ID'] = metrics_df.index + 1
    
    companies_df = pd.DataFrame({'company': companyNamesArray})
    companies_df['Company_ID'] = companies_df.index + 1

    # Connect to PostgreSQL
    load_dotenv()
    
    # Try to create database (ignore if exists)
    engine = create_engine(f"postgresql://{os.getenv('POSTGRES_USER')}:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/postgres")
    try:
        with engine.connect() as conn:
            conn.execution_options(isolation_level="AUTOCOMMIT").execute(
                text(f"CREATE DATABASE {os.getenv('POSTGRES_DB')}")
            )
    except Exception as e:
        print(f"Note: {e}")  # Database might already exist

    # Connect to the target database
    pg_url = f"postgresql://{os.getenv('POSTGRES_USER')}:{os.getenv('POSTGRES_PASSWORD')}@localhost:5432/{os.getenv('POSTGRES_DB')}"
    engine = create_engine(pg_url)

    # Write small tables first
    metrics_df.to_sql('Metrics', engine, if_exists='replace', index=False)
    companies_df.to_sql('Companies', engine, if_exists='replace', index=False)

    # Process and write the large data table in chunks
    for metric in metrics:
        for company in companyNamesArray:
            column_name = f"{metric}_{company}"
            if column_name in stockData_22.columns:
                # Get company_id and metric_id
                company_id = companies_df[companies_df['company'] == company]['Company_ID'].iloc[0]
                metric_id = metrics_df[metrics_df['metric'] == metric]['Metric_ID'].iloc[0]
                
                # Create chunk of data for this company/metric
                temp_df = pd.DataFrame({
                    'Date': stockData_22['Date'],
                    'Company_ID': company_id,
                    'Metric_ID': metric_id,
                    'value': stockData_22[column_name]
                })
                
                # Write to database in chunks
                temp_df.to_sql('Data', engine, if_exists='append', index=False, 
                             method='multi', chunksize=10000)
                
                print(f"Processed {company} - {metric}")

    print("Data written to PostgreSQL database.")

except Exception as e:
    print(f"Error during processing: {e}")

[*********************100%***********************]  15 of 15 completed


Downloaded: ['ADANIPORTS.NS', 'APOLLOHOSP.NS', 'ASIANPAINT.NS', 'AXISBANK.NS', 'BAJAJ-AUTO.NS', 'BAJFINANCE.NS', 'BAJAJFINSV.NS', 'BPCL.NS', 'BHARTIARTL.NS', 'BRITANNIA.NS', 'CIPLA.NS', 'COALINDIA.NS', 'DIVISLAB.NS', 'DRREDDY.NS', 'EICHERMOT.NS']


[*********************100%***********************]  15 of 15 completed


Downloaded: ['GRASIM.NS', 'HCLTECH.NS', 'HDFCBANK.NS', 'HDFCLIFE.NS', 'HEROMOTOCO.NS', 'HINDALCO.NS', 'HINDUNILVR.NS', 'ICICIBANK.NS', 'ITC.NS', 'INDUSINDBK.NS', 'INFY.NS', 'JSWSTEEL.NS', 'KOTAKBANK.NS', 'LT.NS', 'M&M.NS']


[*********************100%***********************]  15 of 15 completed


Downloaded: ['MARUTI.NS', 'NESTLEIND.NS', 'NTPC.NS', 'ONGC.NS', 'POWERGRID.NS', 'RELIANCE.NS', 'SBILIFE.NS', 'SBIN.NS', 'SUNPHARMA.NS', 'TATAMOTORS.NS', 'TATASTEEL.NS', 'TCS.NS', 'TECHM.NS', 'TITAN.NS', 'ULTRACEMCO.NS']


[*********************100%***********************]  2 of 2 completed


Downloaded: ['UPL.NS', 'WIPRO.NS']
Note: (psycopg2.errors.DuplicateDatabase) database "investments_db_nifty50" already exists

[SQL: CREATE DATABASE investments_db_nifty50]
(Background on this error at: https://sqlalche.me/e/20/f405)
Processed ADANIPORTS.NS - Close
Processed APOLLOHOSP.NS - Close
Processed ASIANPAINT.NS - Close
Processed AXISBANK.NS - Close
Processed BAJAJ-AUTO.NS - Close
Processed BAJFINANCE.NS - Close
Processed BAJAJFINSV.NS - Close
Processed BPCL.NS - Close
Processed BHARTIARTL.NS - Close
Processed BRITANNIA.NS - Close
Processed CIPLA.NS - Close
Processed COALINDIA.NS - Close
Processed DIVISLAB.NS - Close
Processed DRREDDY.NS - Close
Processed EICHERMOT.NS - Close
Processed GRASIM.NS - Close
Processed HCLTECH.NS - Close
Processed HDFCBANK.NS - Close
Processed HDFCLIFE.NS - Close
Processed HEROMOTOCO.NS - Close
Processed HINDALCO.NS - Close
Processed HINDUNILVR.NS - Close
Processed ICICIBANK.NS - Close
Processed ITC.NS - Close
Processed INDUSINDBK.NS - Close
Processe