## Load and import package

In [1]:
import yfinance as yf

## Configure parameters

In [2]:
# Start date inclusive
start_date = '2013-01-02'

# End date exclusive
end_date = '2019-01-01'

# Stock of interest: Vanguard Total Stock Market ETF (VTI)
ticker_list = ['VTI']

## Download data

In [3]:
data = yf.download(ticker_list, start=start_date, end=end_date)

[*********************100%***********************]  1 of 1 completed


## Data check and preliminary data transformation

In [4]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-01-02 00:00:00-05:00,74.529999,75.150002,74.5,75.139999,62.719402,5037200
2013-01-03 00:00:00-05:00,75.120003,75.370003,74.839996,75.029999,62.627575,2634600
2013-01-04 00:00:00-05:00,75.139999,75.519997,74.989998,75.410004,62.944786,2512900
2013-01-07 00:00:00-05:00,75.18,75.279999,74.949997,75.209999,62.77784,2511200
2013-01-08 00:00:00-05:00,75.110001,75.18,74.699997,75.010002,62.61092,1407900


In [5]:
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-12-24 00:00:00-05:00,121.900002,122.540001,119.669998,119.699997,112.055878,14488500
2018-12-26 00:00:00-05:00,120.339996,125.519997,119.349998,125.459999,117.448036,12512300
2018-12-27 00:00:00-05:00,123.760002,126.589996,121.910004,126.589996,118.505882,16128500
2018-12-28 00:00:00-05:00,127.5,128.309998,125.730003,126.43,118.35611,11668700
2018-12-31 00:00:00-05:00,127.379997,127.730003,126.279999,127.629997,119.479462,9414800


In [6]:
# Transform the index to column
data = data.reset_index()

# Convert the data type from datetime to date
data['Date'] = data['Date'].dt.date

data.columns = ['Date','Open','High','Low','Close','AdjClose','Volume']

data.head()

Unnamed: 0,Date,Open,High,Low,Close,AdjClose,Volume
0,2013-01-02,74.529999,75.150002,74.5,75.139999,62.719402,5037200
1,2013-01-03,75.120003,75.370003,74.839996,75.029999,62.627575,2634600
2,2013-01-04,75.139999,75.519997,74.989998,75.410004,62.944786,2512900
3,2013-01-07,75.18,75.279999,74.949997,75.209999,62.77784,2511200
4,2013-01-08,75.110001,75.18,74.699997,75.010002,62.61092,1407900


In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1510 entries, 0 to 1509
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      1510 non-null   object 
 1   Open      1510 non-null   float64
 2   High      1510 non-null   float64
 3   Low       1510 non-null   float64
 4   Close     1510 non-null   float64
 5   AdjClose  1510 non-null   float64
 6   Volume    1510 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 82.7+ KB


In [8]:
min_date = data.Date.min().strftime("%Y/%m/%d")
max_date = data.Date.max().strftime("%Y/%m/%d")

print(f'The data start date: {min_date}')
print(f'The data end date: {max_date}')

The data start date: 2013/01/02
The data end date: 2018/12/31


## Write to csv

In [9]:
import os

In [10]:
# Create folder specific to the date range
# Directory 
directory = f"VTI_{start_date}_{end_date}"

# Parent Directory path 
parent_dir = "./data/"
    
# Path 
path = os.path.join(parent_dir, directory) 
    
# Create the directory 
os.makedirs(path) 

In [11]:
path

'./data/VTI_2013-01-02_2019-01-01'

In [12]:
file_name = "VTI.csv"

In [13]:
data.to_csv(f"{path}/{file_name}", index=False)