# Capstone: Portfolio Optimization

# 1. 500 stocks forecast using Darts: Multivariate

In [1]:
# Base
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from darts import TimeSeries
from darts.models import LinearRegressionModel

# Read a pandas DataFrame
df = pd.read_csv('database(alldays)2.csv')
df['date'] = pd.to_datetime(df['date'])
df = df[(df['date']>'2013-02-07')&(df['date']<='2021-02-07')]
# Drop columns with the suffix "_volume"
volume_columns = [col for col in df.columns if col.endswith('_volume') and col != 'dj_volume']

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# List of stock names with prefix "_close" excluding specific columns
exclude_columns = ['sp5t_close', 'inf_close','spr_close','int_close','dj_close','vix_close']
stock_names = [col for col in df.columns if col.endswith('_close') and col not in exclude_columns]

# Create an empty DataFrame to store the forecasts
forecast_df = pd.DataFrame()

# Iterate through each stock
for stock_name in stock_names:
    # Create a DataFrame for the specific stock with relevant columns
    stock_df = df[['date', stock_name,'timing','sp5avg','inf_close','spr_close','int_close','dj_close','dj_volume','vix_close']]
    
    # Create a TimeSeries for the specific stock
    series = TimeSeries.from_dataframe(stock_df, 'date', [stock_name,'timing','sp5avg','inf_close','spr_close','int_close','dj_close','dj_volume','vix_close'])
    
    # Set aside the last 365 days as a validation series
    train, val = series[:-365], series[-365:]
    
    lags = [-365, -365, -365]  # 365 days lag
    model = LinearRegressionModel(lags=lags)
    model.fit(train)
    
    # Make predictions for the validation period
    forecast = model.predict(len(val), num_samples=1)
    
    # Store the forecast in a temporary DataFrame
    stock_forecast_df = forecast.pd_dataframe()[stock_name]
    
    # Concatenate the temporary DataFrame to the forecast DataFrame
    forecast_df = pd.concat([forecast_df, stock_forecast_df], axis=1)

In [3]:
forecast_df.reset_index(inplace=True)
forecast_df.rename(columns={'index': 'date'}, inplace=True)

  forecast_df.reset_index(inplace=True)


In [4]:
forecast_df

Unnamed: 0,date,AAL_close,AAPL_close,AAP_close,ABC_close,ABT_close,ACN_close,ADBE_close,ADI_close,ADM_close,...,WU_close,WYNN_close,WY_close,XEL_close,XOM_close,XRAY_close,XRX_close,YUM_close,ZBH_close,ZION_close
0,2020-02-09,33.155495,65.908718,135.709025,82.102641,84.937613,199.523330,330.420885,116.722840,45.758757,...,22.986121,151.927645,30.397228,61.734155,74.697791,52.898220,33.366076,107.223088,137.319516,53.239291
1,2020-02-10,33.170210,65.764963,136.118205,82.323639,84.825058,199.513681,329.858385,116.636902,45.749279,...,23.005652,150.740700,30.397723,61.718169,74.627131,53.051920,33.241076,107.189973,137.149407,53.161166
2,2020-02-11,32.874024,65.560171,137.123088,82.442438,84.648486,200.119372,327.920885,116.574402,45.405921,...,23.243933,147.426556,30.335789,61.869019,74.306010,53.595492,33.366076,107.597476,137.563540,53.083041
3,2020-02-12,32.737586,66.123761,135.395549,80.565819,85.794121,200.048322,335.420885,116.308777,45.251965,...,22.732214,156.896643,30.249134,61.670719,74.175221,51.251682,33.303576,107.601898,137.094083,53.254916
4,2020-02-13,32.198069,66.365677,137.269572,80.370948,86.216805,199.400022,334.045885,116.027527,45.163200,...,22.798621,153.662102,29.888213,62.028074,73.468129,50.808810,33.491076,107.719256,137.388706,52.254916
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,2021-02-03,36.606709,55.625759,171.802775,125.107555,85.863982,161.440507,384.045885,104.136902,54.546435,...,14.909949,59.063899,29.248554,54.650627,71.918702,37.296844,20.084826,105.123942,130.132802,42.645541
361,2021-02-04,36.311619,56.565315,169.382854,122.768791,87.457993,162.477834,391.295885,103.558777,54.337610,...,14.241980,70.620767,28.889533,54.551800,71.165960,34.369645,20.209826,105.336123,129.762431,43.004916
362,2021-02-05,35.528645,56.684545,172.253947,123.787106,87.380090,160.523640,386.045885,102.621277,54.567809,...,14.529089,57.466724,28.390476,55.437253,70.944341,35.491756,20.366076,105.538980,131.180349,40.598666
363,2021-02-06,35.633666,56.076777,173.402385,124.929447,86.274562,161.280659,380.858385,102.965027,54.478103,...,14.847449,52.176770,28.441511,55.668538,70.636867,37.221244,19.959826,105.655366,130.510158,40.426791


In [5]:
historical = pd.concat([df['date'],df[stock_names]],axis=1)
historical = historical[(historical['date']>='2018-02-07')&(historical['date']<'2020-02-08')]
historical.to_csv('historical2.csv',index=False)

In [6]:
historical = historical[(historical['date']>='2018-02-07')&(historical['date']<'2020-02-08')]
forecast_df = pd.concat([historical,forecast_df])

In [7]:
forecast_df.to_csv('forecast2.csv',index=False)