# Capstone: Portfolio Optimization

# 1. 500 stocks forecast using Darts: Multivariate

In [1]:
# Base
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from darts import TimeSeries
from darts.models import LinearRegressionModel

# Read a pandas DataFrame
df = pd.read_csv('database(alldays)2.csv')
df['date'] = pd.to_datetime(df['date'])
df = df[(df['date']>'2014-02-07')&(df['date']<='2022-02-07')]
# Drop columns with the suffix "_volume"
volume_columns = [col for col in df.columns if col.endswith('_volume') and col != 'dj_volume']

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# List of stock names with prefix "_close" excluding specific columns
exclude_columns = ['sp5t_close', 'inf_close','spr_close','int_close','dj_close','vix_close']
stock_names = [col for col in df.columns if col.endswith('_close') and col not in exclude_columns]

# Create an empty DataFrame to store the forecasts
forecast_df = pd.DataFrame()

# Iterate through each stock
for stock_name in stock_names:
    # Create a DataFrame for the specific stock with relevant columns
    stock_df = df[['date', stock_name,'timing','sp5avg','inf_close','spr_close','int_close','dj_close','dj_volume','vix_close']]
    
    # Create a TimeSeries for the specific stock
    series = TimeSeries.from_dataframe(stock_df, 'date', [stock_name,'timing','sp5avg','inf_close','spr_close','int_close','dj_close','dj_volume','vix_close'])
    
    # Set aside the last 365 days as a validation series
    train, val = series[:-365], series[-365:]
    
    lags = [-365, -365, -365]  # 365 days lag
    model = LinearRegressionModel(lags=lags)
    model.fit(train)
    
    # Make predictions for the validation period
    forecast = model.predict(len(val), num_samples=1)
    
    # Store the forecast in a temporary DataFrame
    stock_forecast_df = forecast.pd_dataframe()[stock_name]
    
    # Concatenate the temporary DataFrame to the forecast DataFrame
    forecast_df = pd.concat([forecast_df, stock_forecast_df], axis=1)

In [3]:
forecast_df.reset_index(inplace=True)
forecast_df.rename(columns={'index': 'date'}, inplace=True)

  forecast_df.reset_index(inplace=True)


In [4]:
forecast_df

Unnamed: 0,date,AAL_close,AAPL_close,AAP_close,ABC_close,ABT_close,ACN_close,ADBE_close,ADI_close,ADM_close,...,WU_close,WYNN_close,WY_close,XEL_close,XOM_close,XRAY_close,XRX_close,YUM_close,ZBH_close,ZION_close
0,2021-02-08,6.278266,133.925678,165.752058,102.585744,113.16073,248.970809,522.824658,140.409576,46.163684,...,20.594859,71.749304,27.853558,67.172519,35.552863,46.13292,14.301156,105.275291,138.687295,30.462632
1,2021-02-09,5.799398,135.884250,166.905878,103.046681,113.72323,249.472762,529.137158,140.456451,46.311993,...,20.532359,68.278575,27.791058,67.222017,34.858920,45.44542,13.901035,104.337791,139.029688,29.368882
2,2021-02-10,6.362132,133.886594,166.179746,101.991994,113.34823,248.560652,525.887158,137.347076,46.060675,...,20.422984,71.802717,27.697308,66.910490,35.753972,44.75792,14.236727,103.994041,138.811957,30.306382
3,2021-02-11,7.260610,134.219463,163.426218,101.445119,113.28573,247.265242,527.574658,136.222076,46.402865,...,19.969859,73.008102,27.556683,66.453447,36.419909,43.57042,14.336047,103.697166,136.821686,30.087632
4,2021-02-12,7.008289,133.292265,163.678396,101.695119,113.16073,247.662703,525.824658,136.034576,46.413923,...,20.079234,73.060238,27.744183,66.299170,36.569065,44.13292,14.168461,103.915916,137.198968,30.462632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,2022-02-03,-2.000924,224.661755,172.335769,120.070119,138.28573,324.900496,735.012158,164.097076,56.474801,...,20.063609,126.856384,42.087933,71.864541,18.702058,57.63292,8.218858,111.353416,157.705169,32.056382
361,2022-02-04,-1.617000,227.638314,170.525305,118.335744,138.59823,323.058211,740.762158,162.347076,56.141891,...,19.797984,122.620883,41.587933,71.481173,18.321002,55.44542,8.554918,112.665916,156.756392,33.150132
362,2022-02-05,-1.595359,228.003414,170.200821,119.507619,138.97323,325.383895,744.574658,165.253326,56.323497,...,19.891734,129.595414,42.275433,71.502695,17.756731,56.75792,8.296078,111.103416,157.426440,32.431382
363,2022-02-06,-1.565266,227.810541,170.182376,119.382619,139.03573,325.550887,744.387158,165.315826,56.333604,...,19.844859,130.086948,42.306683,71.473563,17.850547,56.88292,8.264924,111.165916,157.351639,32.525132


In [5]:
historical = pd.concat([df['date'],df[stock_names]],axis=1)
historical = historical[(historical['date']>='2019-02-07')&(historical['date']<'2021-02-08')]
historical.to_csv('historical2.csv',index=False)

In [6]:
historical = historical[(historical['date']>='2019-02-07')&(historical['date']<'2021-02-08')]
forecast_df = pd.concat([historical,forecast_df])

In [7]:
forecast_df.to_csv('forecast2.csv',index=False)