## Load and import package

In [3]:
import yfinance as yf

## Configure parameters

In [4]:
# Start date inclusive
start_date = '2015-11-25'

# End date exclusive
end_date = '2018-11-24'

# Stock of interest: Vanguard Total Stock Market ETF (VTI)
ticker_list = ['VTI']

## Download data

In [5]:
data = yf.download(ticker_list, start=start_date, end=end_date)

[*********************100%***********************]  1 of 1 completed


## Data check and preliminary data transformation

In [6]:
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-11-25 00:00:00-05:00,107.510002,107.660004,107.25,107.470001,94.460785,1820300
2015-11-27 00:00:00-05:00,107.589996,107.760002,107.220001,107.629997,94.601395,552400
2015-11-30 00:00:00-05:00,107.779999,107.849998,107.110001,107.169998,94.197067,3618100
2015-12-01 00:00:00-05:00,107.589996,108.209999,107.370003,108.18,95.084846,2443600
2015-12-02 00:00:00-05:00,108.099998,108.269997,106.879997,107.050003,94.091621,2937200


In [7]:
# Transform the index to column
data = data.reset_index()

# Convert the data type from datetime to date
data['Date'] = data['Date'].dt.date

data.columns = ['Date','Open','High','Low','Close','AdjClose','Volume']

data.head()

Unnamed: 0,Date,Open,High,Low,Close,AdjClose,Volume
0,2015-11-25,107.510002,107.660004,107.25,107.470001,94.460785,1820300
1,2015-11-27,107.589996,107.760002,107.220001,107.629997,94.601395,552400
2,2015-11-30,107.779999,107.849998,107.110001,107.169998,94.197067,3618100
3,2015-12-01,107.589996,108.209999,107.370003,108.18,95.084846,2443600
4,2015-12-02,108.099998,108.269997,106.879997,107.050003,94.091621,2937200


In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 755 entries, 0 to 754
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      755 non-null    object 
 1   Open      755 non-null    float64
 2   High      755 non-null    float64
 3   Low       755 non-null    float64
 4   Close     755 non-null    float64
 5   AdjClose  755 non-null    float64
 6   Volume    755 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 41.4+ KB


In [9]:
min_date = data.Date.min().strftime("%Y/%m/%d")
max_date = data.Date.max().strftime("%Y/%m/%d")

print(f'The data start date: {min_date}')
print(f'The data end date: {max_date}')

The data start date: 2015/11/25
The data end date: 2018/11/23


## Write to csv

In [10]:
import os

In [12]:
# Create folder specific to the date range
# Directory 
directory = f"VTI_{start_date}_{end_date}"

# Parent Directory path 
parent_dir = "./data/"
    
# Path 
path = os.path.join(parent_dir, directory) 
    
# Create the directory 
os.makedirs(path) 

In [14]:
path

'./data/VTI_2015-11-25_2018-11-24'

In [17]:
file_name = "VTI.csv"

In [18]:
data.to_csv(f"{path}/{file_name}", index=False)