In [2]:
import pandas as pd

In [52]:
mf = pd.read_csv("datasets/transformed_data_mf.csv")
i_f = pd.read_csv("datasets/transformed_data_if.csv")
etf = pd.read_csv("datasets/transformed_data_etf.csv")

In [4]:
mf.head()

Unnamed: 0,Stock,Date,Adj Close,Close,High,Low,Open,Volume
0,AEPGX,2023-01-03,46.336636,49.150002,49.150002,49.150002,49.150002,0.0
1,AEPGX,2023-01-04,46.883434,49.73,49.73,49.73,49.73,0.0
2,AEPGX,2023-01-05,46.638317,49.470001,49.470001,49.470001,49.470001,0.0
3,AEPGX,2023-01-06,47.722488,50.619999,50.619999,50.619999,50.619999,0.0
4,AEPGX,2023-01-09,48.335285,51.27,51.27,51.27,51.27,0.0


In [5]:
mf.tail()

Unnamed: 0,Stock,Date,Adj Close,Close,High,Low,Open,Volume
11542,VYM,2024-11-22,133.619995,133.619995,133.740005,132.690002,132.690002,1572300.0
11543,VYM,2024-11-25,134.419998,134.419998,134.979996,134.080002,134.380005,849900.0
11544,VYM,2024-11-26,134.460007,134.460007,134.550003,133.720001,134.339996,708400.0
11545,VYM,2024-11-27,134.279999,134.279999,135.050003,134.190002,134.679993,658200.0
11546,VYM,2024-11-29,134.740005,134.740005,135.070007,134.440002,134.520004,458600.0


In [12]:
import pycaret
from pycaret.regression import *
from pyfinance.ols import PandasRollingOLS

In [56]:

etf['Date'] = pd.to_datetime(etf['Date'])
etf = etf.sort_values(by=['Stock', 'Date'])

etf = etf[['Stock', 'Date', 'Adj Close']]

rolling_avg = (
    etf.groupby('Stock')['Adj Close']
    .rolling(window=30)
    .mean()
    .reset_index(level=0)  
)

etf['MA_30'] = rolling_avg['Adj Close']

etf[etf['Stock'] == 'AEPGX'].head(35)
etf.dropna()
 

Unnamed: 0,Stock,Date,Adj Close,MA_30
29,ACWX,2023-02-14,46.405899,45.993340
30,ACWX,2023-02-15,46.170238,46.093259
31,ACWX,2023-02-16,45.972286,46.158301
32,ACWX,2023-02-17,45.896877,46.233397
33,ACWX,2023-02-21,45.359570,46.256335
...,...,...,...,...
12020,XLY,2024-11-22,218.100006,206.061333
12021,XLY,2024-11-25,220.330002,206.824333
12022,XLY,2024-11-26,221.130005,207.599666
12023,XLY,2024-11-27,220.039993,208.303999


In [57]:
def get_model(df):
    df['Date'] = pd.to_datetime(df['Date'])

    # Extract Year, Month, and Day from Date
    df['Year'] = df['Date'].dt.year
    df['Month'] = df['Date'].dt.month
    df['Day'] = df['Date'].dt.day

    df = df.drop(columns=['Date'])

    df['Stock'] = df['Stock'].astype('category')

    target = 'Adj Close'
    selected_features = ['Stock', 'Year', 'Month', 'Day','MA_30'] 

    df = df[selected_features + [target]]

    reg = setup(data=df, target=target, categorical_features=['Stock'])

    best_model = compare_models()
    return best_model

best_model = get_model(i_f)

Unnamed: 0,Description,Value
0,Session id,8212
1,Target,Adj Close
2,Target type,Regression
3,Original data shape,"(11544, 6)"
4,Transformed data shape,"(11544, 29)"
5,Transformed train set shape,"(8080, 29)"
6,Transformed test set shape,"(3464, 29)"
7,Numeric features,4
8,Categorical features,1
9,Rows with missing values,6.0%


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,1.1341,4.3529,2.0782,0.9997,0.0139,0.0084,0.396
rf,Random Forest Regressor,1.5653,9.2097,3.0134,0.9995,0.0207,0.0117,0.539
xgboost,Extreme Gradient Boosting,1.7577,9.0701,2.9995,0.9995,0.0216,0.0144,0.291
dt,Decision Tree Regressor,1.7431,15.6508,3.9116,0.9991,0.0252,0.0127,0.036
lightgbm,Light Gradient Boosting Machine,3.5597,48.6669,6.9286,0.9971,0.0872,0.039,0.088
gbr,Gradient Boosting Regressor,4.9665,102.1926,10.0863,0.9939,0.1185,0.0567,0.214
lar,Least Angle Regression,9.5063,241.2864,15.515,0.9857,0.238,0.1281,0.028
br,Bayesian Ridge,9.4991,241.297,15.5153,0.9857,0.2378,0.1281,0.03
ridge,Ridge Regression,9.3941,241.8991,15.5326,0.9856,0.235,0.1277,0.031
lr,Linear Regression,9.8526,248.3986,15.735,0.9852,0.2467,0.1333,0.036


In [35]:
from pycaret.regression import predict_model

def predict_adj_close(stock, future_date, historical_data, trained_model):
    future_date = pd.to_datetime(future_date)
    year, month, day = future_date.year, future_date.month, future_date.day

    past_data = historical_data[historical_data['Stock'] == stock].tail(30)

    # Calculate the 30-day moving average
    ma_30 = past_data['Adj Close'].mean()

    # Prepare the input data
    input_data = pd.DataFrame({
        'Stock': [stock],
        'Year': [year],
        'Month': [month],
        'Day': [day],
        'MA_30': [ma_30]
    })

    # Get the model's prediction
    prediction_result = predict_model(trained_model, data=input_data)

    # Access the correct column ('prediction_label')
    prediction = prediction_result.iloc[0]['prediction_label']

    return prediction

# Predict for a future date
future_prediction = predict_adj_close('AEPGX', '2028-08-28', mf, best_model)
print(f"The predicted Adj Close for AEPGX on 2024-12-15 is {future_prediction:.2f}")


The predicted Adj Close for AEPGX on 2024-12-15 is 58.08


In [38]:
import yfinance as yf

def get_today_price(stock):
    # Fetch the latest data for the stock using yfinance
    stock_data = yf.download(stock, period='1mo', interval='1d') 
    today_price = stock_data['Adj Close'].iloc[-1]  # Get the last adjusted close price
    return today_price

def calculate_investment_value(initial_investment, stock, future_date, historical_data, trained_model):
    # Get today's adjusted close price using yfinance
    today_price = get_today_price(stock)

    # Calculate how many shares you can buy with the initial investment
    shares_bought = initial_investment / today_price

    # Predict the adjusted close price for the future date
    future_price = predict_adj_close(stock, future_date, historical_data, trained_model)

    # Calculate the final amount (number of shares * predicted future price)
    final_amount = shares_bought * future_price

    return final_amount

# Example usage:
initial_investment = 10000  # Amount you're investing today
future_date = '2024-12-15'  # The future date you're predicting for
stock = 'AEPGX'  # Stock symbol

final_value = calculate_investment_value(initial_investment, stock, future_date, mf, best_model)
print(f"Your investment of {initial_investment} in {stock} today will be worth {final_value:.2f} on {future_date}.")


[*********************100%***********************]  1 of 1 completed


Your investment of 10000 in AEPGX today will be worth 10680.88 on 2024-12-15.


In [58]:
save_model(best_model,"models/etf")

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['Year', 'Month', 'Day', 'MA_30'],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=['Stock'],
                                     transformer=SimpleImputer(strategy='most_frequent'))),
                 ('onehot_encoding',
                  TransformerWrapper(include=['Stock'],
                                     transformer=OneHotEncoder(cols=['Stock'],
                                                               handle_missing='return_nan',
                                                               use_cat_names=True))),
                 ('clean_column_names',
                  TransformerWrapper(transformer=CleanColumnNames())),
                 ('trained_model',
                  ExtraTreesRegressor(n_jobs=-1, random_state=8212))]),
 'models/etf.pkl