# Capstone: Portfolio Optimization

# 1. 500 stocks forecast using Darts: Multivariate

In [1]:
# Base
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from darts import TimeSeries
from darts.models import LinearRegressionModel

# Read a pandas DataFrame
df = pd.read_csv('database(alldays)2.csv')
df['date'] = pd.to_datetime(df['date'])
df = df[(df['date']>'2015-02-07')&(df['date']<='2023-02-07')]
# Drop columns with the suffix "_volume"
volume_columns = [col for col in df.columns if col.endswith('_volume') and col != 'dj_volume']

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# List of stock names with prefix "_close" excluding specific columns
exclude_columns = ['sp5t_close', 'inf_close','spr_close','int_close','dj_close','vix_close']
stock_names = [col for col in df.columns if col.endswith('_close') and col not in exclude_columns]

# Create an empty DataFrame to store the forecasts
forecast_df = pd.DataFrame()

# Iterate through each stock
for stock_name in stock_names:
    # Create a DataFrame for the specific stock with relevant columns
    stock_df = df[['date', stock_name,'timing','sp5avg','inf_close','spr_close','int_close','dj_close','dj_volume','vix_close']]
    
    # Create a TimeSeries for the specific stock
    series = TimeSeries.from_dataframe(stock_df, 'date', [stock_name,'timing','sp5avg','inf_close','spr_close','int_close','dj_close','dj_volume','vix_close'])
    
    # Set aside the last 365 days as a validation series
    train, val = series[:-365], series[-365:]
    
    lags = [-365, -365, -365]  # 365 days lag
    model = LinearRegressionModel(lags=lags)
    model.fit(train)
    
    # Make predictions for the validation period
    forecast = model.predict(len(val), num_samples=1)
    
    # Store the forecast in a temporary DataFrame
    stock_forecast_df = forecast.pd_dataframe()[stock_name]
    
    # Concatenate the temporary DataFrame to the forecast DataFrame
    forecast_df = pd.concat([forecast_df, stock_forecast_df], axis=1)

In [3]:
forecast_df.reset_index(inplace=True)
forecast_df.rename(columns={'index': 'date'}, inplace=True)

  forecast_df.reset_index(inplace=True)


In [4]:
forecast_df

Unnamed: 0,date,AAL_close,AAPL_close,AAP_close,ABC_close,ABT_close,ACN_close,ADBE_close,ADI_close,ADM_close,...,WU_close,WYNN_close,WY_close,XEL_close,XOM_close,XRAY_close,XRX_close,YUM_close,ZBH_close,ZION_close
0,2022-02-07,13.413737,172.749356,236.9866,128.950569,134.990566,380.031455,626.671381,180.228468,67.433348,...,18.054526,99.172723,39.591320,68.768757,61.982065,54.490493,18.289883,134.682208,132.61752,62.87855
1,2022-02-08,12.632487,172.165545,239.2366,128.711059,135.380518,379.828800,627.453138,177.532760,67.245848,...,18.115059,94.016473,39.099132,69.466559,60.950815,53.306316,18.020351,136.588458,132.99252,62.37855
2,2022-02-09,12.351237,172.850284,239.7366,128.730331,135.549458,380.340156,627.620322,177.865234,67.245848,...,18.229699,90.578973,38.950695,69.752583,61.075815,53.292393,18.118008,136.807208,132.86752,62.62855
3,2022-02-10,12.069987,172.707377,239.9866,128.605596,135.792657,378.515474,626.792207,177.314519,67.152098,...,18.235559,89.297723,38.544445,69.811372,60.513315,52.455813,18.352383,135.307208,133.36752,62.44105
4,2022-02-11,11.976237,173.979523,241.4866,128.750396,135.621460,380.963867,628.099443,175.048116,67.245848,...,18.260050,88.860223,38.794445,70.371081,60.732065,53.384652,18.215664,135.119708,133.24252,62.12855
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,2023-02-02,-48.992513,184.641510,183.4866,124.690785,167.656430,405.435459,765.407264,167.719250,40.495848,...,14.648993,-40.046027,19.419445,89.536712,5.075815,33.207364,9.289883,156.182208,139.99252,-2.62145
361,2023-02-03,-47.555013,183.117147,182.7366,124.490467,167.265900,400.894612,752.235774,171.030807,40.620848,...,14.943646,-32.171027,20.036632,88.929121,6.888315,34.130341,9.785976,155.119708,141.11752,-0.74645
362,2023-02-04,-47.023763,181.311186,180.7366,123.430497,166.349918,400.808537,748.806798,171.022703,39.995848,...,14.961584,-27.921027,19.903820,89.191209,7.107065,34.328326,10.188320,155.619708,141.24252,0.44105
363,2023-02-05,-47.180013,181.268741,180.7366,123.438262,166.356637,400.664105,749.465716,170.961791,39.933348,...,15.010935,-27.921027,19.919445,89.291420,6.857065,34.329779,10.164883,155.557208,141.49252,0.31605


In [5]:
historical = pd.concat([df['date'],df[stock_names]],axis=1)
historical = historical[(historical['date']>='2020-02-07')&(historical['date']<'2022-02-08')]
historical.to_csv('historical2.csv',index=False)

In [6]:
historical = historical[(historical['date']>='2020-02-07')&(historical['date']<'2022-02-08')]
forecast_df = pd.concat([historical,forecast_df])

In [7]:
forecast_df.to_csv('forecast2.csv',index=False)