# Machine Learning for Trading

## Lesson 3

In [1]:
import pandas as pd
import os

* Create an empty data frame

In [2]:
''' Build a dataframe in pandas '''
def empty_data_frame():
    
    # Define data range
    start_date = '2015-01-22'
    end_date = '2015-01-26'
    dates = pd.date_range(start_date, end_date)
    
    # Create an empty dataframe
    df1 = pd.DataFrame(index=dates)
    
    # Read SPY data into temporary dataframe
    # Date will be use for index
    dfSPY = pd.read_csv("data/SPY.csv", index_col="Date", parse_dates=True,
                               usecols=['Date', 'Adj Close'], na_values=['nan'])
    
    #Rename 'Adj Close' column to 'SPY' to prevent clash
    dfSPY = dfSPY.rename(columns={'Adj Close':'SPY'})
    
    # print (dfSPY) # check the index of dfSPY
    
    # Join the two dataframes using DataFrame.join()
    # how = 'inner' --> Drop NaN Values,     df1 = df1.dropna()
    df1 = df1.join(dfSPY, how = 'inner')
    
    
    # Read in more stocks
    symbols = ['GOOG','IBM','GLD']
    
    for symbol in symbols:
        df_temp = pd.read_csv("data/{}.csv".format(symbol), index_col='Date', 
                             parse_dates=True, usecols=['Date', 'Adj Close'] , na_values=['nan'])
        
        #Rename 'Adj Close' column to 'SPY' to prevent clash
        df_temp = df_temp.rename(columns={'Adj Close': symbol})
    
        df1=df1.join(df_temp) # use default how='left'
    

    
    print (df1)

In [3]:
empty_data_frame()

                   SPY        GOOG         IBM         GLD
2015-01-22  192.280518  531.465332  134.329025  125.230003
2015-01-23  191.226273  536.994873  134.743942  124.230003
2015-01-26  191.674103  532.280823  135.167526  122.989998


* Modify empty_data_frame to create utility functions

In [4]:
def symbol_to_path(symbol, base_dir='data'):
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

In [5]:
def get_data(symbols, dates):
    
    """Read stock data (adjusted close) for given symbols from CSV files."""
    df = pd.DataFrame(index=dates)
    
    if 'SPY' not in symbols:  # add SPY for reference, if absent
        symbols.insert(0, 'SPY')
        
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date', 
                                        parse_dates=True, usecols=['Date', 'Adj Close'] , na_values=['nan'])
        df_temp = df_temp.rename(columns={'Adj Close': symbol})
        
        df=df.join(df_temp, how = 'inner') # use default how='left'
        
    return df    
        

In [6]:
def data_frame():
    # Define a date range
    dates = pd.date_range('2015-01-22', '2015-01-26')

    # Choose stock symbols to read
    symbols = ['GOOG', 'IBM', 'GLD']
    
    # Get stock data
    df = get_data(symbols, dates)
    print (df)

In [7]:
data_frame()

                   SPY        GOOG         IBM         GLD
2015-01-22  192.280518  531.465332  134.329025  125.230003
2015-01-23  191.226273  536.994873  134.743942  124.230003
2015-01-26  191.674103  532.280823  135.167526  122.989998
