In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
import joblib



  from pandas.core import (


In [3]:
# Define base directory paths
base_dir = base_dir = r'd:\Users\Jonps\Documents\portfolio_triagem\Streamlit analizer panel'
data_dir = os.path.join(base_dir, 'data/')
model_dir = os.path.join(base_dir, 'models', 'linear_regression/')
os.makedirs(model_dir, exist_ok=True)

# Load the SP500 stock list
csv_file_path = os.path.join(base_dir, 'sp500_stocks.csv')
sp500_stocks = pd.read_csv(csv_file_path)




In [4]:
def train_linear_regression(symbol):
    # Load data
    data_path = os.path.join(data_dir, f'{symbol}.csv')
    if not os.path.exists(data_path):
        print(f'Data for {symbol} not found.')
        return

    stock_data = pd.read_csv(data_path)
    if stock_data.empty or 'Close' not in stock_data.columns:
        print(f'No valid data for {symbol}.')
        return

    stock_data['Date'] = pd.to_datetime(stock_data['Date'])
    stock_data.set_index('Date', inplace=True)

    if stock_data['Close'].isnull().all():
        print(f'No closing prices for {symbol}.')
        return

    # Prepare data
    scaler = MinMaxScaler(feature_range=(0, 1))
    try:
        scaled_data = scaler.fit_transform(stock_data['Close'].values.reshape(-1, 1))
    except ValueError as e:
        print(f'Error scaling data for {symbol}: {e}')
        return

    def create_dataset(data, look_back=100):
        X, Y = [], []
        for i in range(len(data) - look_back):
            X.append(data[i:(i + look_back), 0])
            Y.append(data[i + look_back, 0])
        return np.array(X), np.array(Y)

    look_back = 100
    x_train, y_train = create_dataset(scaled_data)

    if len(x_train) == 0 or len(y_train) == 0:
        print(f'Not enough data to create training set for {symbol}.')
        return

    # Build and train model
    model = LinearRegression()
    model.fit(x_train, y_train)

    # Save model
    model_path = os.path.join(model_dir, f'{symbol}_lr.pkl')
    joblib.dump(model, model_path)
    print(f'Model for {symbol} saved as {symbol}_lr.pkl')




In [5]:
# Train and save Linear Regression model for each SP500 stock
for symbol in sp500_stocks['Symbol']:
    train_linear_regression(symbol)

Model for A saved as A_lr.pkl
Model for AAL saved as AAL_lr.pkl
Model for AAPL saved as AAPL_lr.pkl
Model for ABBV saved as ABBV_lr.pkl
Model for ABNB saved as ABNB_lr.pkl
Model for ABT saved as ABT_lr.pkl
Model for ACGL saved as ACGL_lr.pkl
Model for ACN saved as ACN_lr.pkl
Model for ADBE saved as ADBE_lr.pkl
Model for ADI saved as ADI_lr.pkl
Model for ADM saved as ADM_lr.pkl
Model for ADP saved as ADP_lr.pkl
Model for ADSK saved as ADSK_lr.pkl
Model for AEE saved as AEE_lr.pkl
Model for AEP saved as AEP_lr.pkl
Model for AES saved as AES_lr.pkl
Model for AFL saved as AFL_lr.pkl
Model for AIG saved as AIG_lr.pkl
Model for AIZ saved as AIZ_lr.pkl
Model for AJG saved as AJG_lr.pkl
Model for AKAM saved as AKAM_lr.pkl
Model for ALB saved as ALB_lr.pkl
Model for ALGN saved as ALGN_lr.pkl
Model for ALL saved as ALL_lr.pkl
Model for ALLE saved as ALLE_lr.pkl
Model for AMAT saved as AMAT_lr.pkl
Model for AMCR saved as AMCR_lr.pkl
Model for AMD saved as AMD_lr.pkl
Model for AME saved as AME_lr.