# FinRL

## Part 1

Install and Import Packages

In [None]:
%pip install swig
%pip install wrds
%pip install pyportfolioopt
%pip install yfinance
%pip install box2d-py

In [None]:
# finrl
%pip install git+https://github.com/AI4Finance-Foundation/FinRL.git

In [14]:
import os
import pandas as pd

import numpy as np
import datetime
import yfinance as yf

from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl import config_tickers
from finrl.config import INDICATORS

import itertools

In [23]:
TRAIN_START_DATE = '2009-01-01'
TRAIN_END_DATE = '2020-07-01'
TRADE_START_DATE = '2020-07-01'
TRADE_END_DATE = '2023-05-01'

In [28]:
symbols = [
    'tsla',
    'msft',
    'nvda',
    'crsp',
    'googl',
    'mstr',
    'btc'
]

In [29]:
df_raw = YahooDownloader(start_date=TRAIN_START_DATE,
                         end_date=TRADE_END_DATE,
                         ticker_list=symbols).fetch_data()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (19941, 8)


In [30]:
df_raw.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2009-01-02,7.722723,8.053554,7.645145,8.041041,144275580,googl,4
1,2009-01-02,19.530001,20.4,19.370001,15.097111,50084000,msft,4
2,2009-01-02,37.200001,37.82,36.990002,37.279999,56000,mstr,4
3,2009-01-02,2.0175,2.1925,2.01,1.997859,49712400,nvda,4
4,2009-01-05,8.033033,8.289289,7.882883,8.209459,195364440,googl,0


### Preprocess Data

In [31]:
fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list=INDICATORS,
                     use_vix=True,
                     use_turbulence=True,
                     user_defined_feature=False)

processed = fe.preprocess_data(df_raw)

Successfully added technical indicators
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (3604, 8)
Successfully added vix
Successfully added turbulence index


In [32]:
list_ticker = processed['tic'].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(), processed['date'].max()).astype(str))
combination = list(itertools.product(list_date, list_ticker))

processed_full = pd.DataFrame(combination, columns=['date', 'tic']).merge(processed, on=['date', 'tic'], how='left')
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date', 'tic'])

processed_full = processed_full.fillna(0)

### Save the data

In [33]:
# Split the data
train = data_split(processed_full, TRAIN_START_DATE, TRAIN_END_DATE)
trade = data_split(processed_full, TRADE_START_DATE, TRADE_END_DATE)

print(len(train))
print(len(trade))

11572
2844


In [34]:
log_dir = "data/"
train_path = os.path.join('data', 'train_data.csv')
trade_path = os.path.join('data', 'trade_data.csv')

with open(train_path, 'w', encoding = 'utf-8-sig') as f:
    train.to_csv(f)
    
with open(trade_path, 'w', encoding = 'utf-8-sig') as f:
    trade.to_csv(f)