# Capstone: Portfolio Optimization

# 1. 500 stocks forecast using Darts: Multivariate

In [8]:
# Base
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from darts import TimeSeries
from darts.models import LinearRegressionModel

# Read a pandas DataFrame
df = pd.read_csv('database(alldays)2.csv')
df['date'] = pd.to_datetime(df['date'])
df = df[(df['date']>'2012-02-07')&(df['date']<='2020-02-07')]
# Drop columns with the suffix "_volume"
volume_columns = [col for col in df.columns if col.endswith('_volume') and col != 'dj_volume']

In [9]:
# List of stock names with prefix "_close" excluding specific columns
exclude_columns = ['sp5t_close', 'inf_close','spr_close','int_close','dj_close','vix_close']
stock_names = [col for col in df.columns if col.endswith('_close') and col not in exclude_columns]

# Create an empty DataFrame to store the forecasts
forecast_df = pd.DataFrame()

# Iterate through each stock
for stock_name in stock_names:
    # Create a DataFrame for the specific stock with relevant columns
    stock_df = df[['date', stock_name,'timing','sp5avg','inf_close','spr_close','int_close','dj_close','dj_volume','vix_close']]
    
    # Create a TimeSeries for the specific stock
    series = TimeSeries.from_dataframe(stock_df, 'date', [stock_name,'timing','sp5avg','inf_close','spr_close','int_close','dj_close','dj_volume','vix_close'])
    
    # Set aside the last 365 days as a validation series
    train, val = series[:-365], series[-365:]
    
    lags = [-365, -365, -365]  # 365 days lag
    model = LinearRegressionModel(lags=lags)
    model.fit(train)
    
    # Make predictions for the validation period
    forecast = model.predict(len(val), num_samples=1)
    
    # Store the forecast in a temporary DataFrame
    stock_forecast_df = forecast.pd_dataframe()[stock_name]
    
    # Concatenate the temporary DataFrame to the forecast DataFrame
    forecast_df = pd.concat([forecast_df, stock_forecast_df], axis=1)

In [10]:
forecast_df.reset_index(inplace=True)
forecast_df.rename(columns={'index': 'date'}, inplace=True)

  forecast_df.reset_index(inplace=True)


In [11]:
forecast_df

Unnamed: 0,date,AAL_close,AAPL_close,AAP_close,ABC_close,ABT_close,ACN_close,ADBE_close,ADI_close,ADM_close,...,WU_close,WYNN_close,WY_close,XEL_close,XOM_close,XRAY_close,XRX_close,YUM_close,ZBH_close,ZION_close
0,2019-02-08,26.095033,41.971706,196.608790,90.995116,71.837122,160.809368,250.77703,89.932497,43.139738,...,20.033271,26.13125,23.545717,49.816409,63.489447,33.756266,18.442498,91.087747,103.049632,44.686359
1,2019-02-09,30.099814,44.185346,188.268010,93.141213,72.744741,161.892981,264.52703,91.484765,44.657056,...,19.575837,66.13125,25.621917,49.349706,67.924096,33.969084,21.349835,91.862812,106.548824,46.686359
2,2019-02-10,30.282522,44.354850,187.314629,92.657443,72.809786,162.007267,266.02703,91.725203,44.620637,...,19.520814,66.13125,25.665605,49.407715,68.185050,33.811050,21.469833,92.047362,106.673154,46.686359
3,2019-02-11,30.482599,44.543501,186.241484,92.111142,72.883807,162.129541,266.52703,91.991884,44.578834,...,19.458662,70.13125,25.713110,49.471147,68.477555,33.630070,21.603227,92.253989,106.807101,46.811359
4,2019-02-12,29.240641,44.311253,186.540731,88.719582,73.191480,158.013421,256.65203,91.011142,45.195192,...,19.110065,56.13125,24.251223,50.371470,68.958698,32.663505,20.728936,91.642289,104.579678,45.092609
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,2020-02-03,28.969632,57.992860,98.719618,58.487037,71.907837,197.297586,356.96453,119.292177,37.066706,...,18.841888,161.13125,29.865512,54.928797,72.446008,18.136782,19.757003,115.252506,102.203631,66.076984
361,2020-02-04,28.847834,57.730023,101.336671,58.171290,72.129965,194.427343,356.83953,118.557094,37.817457,...,18.717348,157.13125,29.458970,55.212085,73.226655,18.060567,19.605733,114.869229,101.678442,65.084797
362,2020-02-05,28.789417,57.781070,100.861992,57.391559,72.446162,194.222171,358.15203,118.188459,39.535250,...,18.482817,158.13125,29.192020,55.308835,73.137487,17.564447,19.712823,114.766718,101.656250,64.623859
363,2020-02-06,28.498552,57.298763,105.168514,58.124145,71.747631,192.615958,352.52703,116.992013,39.871287,...,18.615699,148.13125,28.859703,55.513570,72.788875,18.283901,19.179186,114.164705,101.730038,63.748859


In [12]:
historical = pd.concat([df['date'],df[stock_names]],axis=1)
historical = historical[(historical['date']>='2017-02-07')&(historical['date']<'2019-02-08')]
historical.to_csv('historical2.csv',index=False)

In [13]:
historical = historical[(historical['date']>='2017-02-07')&(historical['date']<'2019-02-08')]
forecast_df = pd.concat([historical,forecast_df])

In [14]:
forecast_df.to_csv('forecast2.csv',index=False)