In [1]:
import pandas as pd 
import os
import matplotlib.pyplot as plt

def get_max_close(symbol):
    """Return the maxium closing value for stock indicated by symbol
    """
    df = pd.read_csv("data/{}.csv".format(symbol)) #read in csv file
    return df['Close'].max() #compute and return max

def get_mean_volume(symbol):
    """Return the mean closing volume indicated by symbol
    """
    df = pd.read_csv("data/{}.csv".format(symbol)) #read in csv file
    return df['Volume'].mean() #compute and return mean

def plot_adjusted_close(symbol):
    """Plot Stock Prices"""
    df = pd.read_csv("data/{}.csv".format(symbol))
    df['Adj Close'].plot()
    plt.show() #show the plot

def plot_two_columns(col1, col2, symbol):
    """Plot two columns"""
    df = pd.read_csv("data/{}.csv".format(symbol))
    df[[col1, col2]].plot()
    plt.show()
    
def symbol_to_path(symbol, base_dir="data"):
    #create path to given symbol csv file. default directory = data
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def select_rows(dataframe, start_date, end_date):
    #slice by row range using DataFrame.ix[] selector
    return dataframe.ix[str(start_date):str(end_date)]

def get_data(symbols, date_range):
    data = symbols
   
    #Create empty DataFrame
    df = pd.DataFrame(index=date_range)
    
    if 'SPY' not in symbols: #if SPY is absent, add for reference
        symbols.insert(0, 'SPY')
    
    #join all columns in symbols
    for symbol in data:
        dftemp = pd.read_csv(symbol_to_path(symbol), index_col="Date", parse_dates=True, \
                                     usecols=['Date', 'Adj Close'], na_values='nan')
        dftemp = dftemp.rename(columns={'Adj Close': symbol}) #rename 'Adj Close' column to symbol
        df = df.join(dftemp) #join dfintermediate and df (default join is left join)
        if symbol == 'SPY': #drop dates 'SPY' didn't trade
            df = df.dropna(subset=["SPY"])
    return df

def plot_data(dataframe, plot_title="Stock Prices"):
    ax = dataframe.plot(title=plot_title)
    ax.set_xlabel('Date')
    ax.set_ylabel('Price')
    plt.show() #must be called to show plot
    
def plot_selected(df, columns, start_index, end_index):
    """Plot the desired columns over index values in the given range."""
    plot_data(df.ix[start_index:end_index, columns], "Selected Data")

def normalize(df):
    """Normalize stock prices using the first row of the dataframe."""
    return df / df[0:1]

def test_splice(df):
    print "Splice by double column:"
    print df[['IBM', 'GLD']] #splice by column (i.e. whole ) (use list of symbols)
    
    print "Splice by single column:"
    print df['IBM'] #splice a single column
    
    print "Splice by row range:"
    print df['2010-01-01':'2010-01-31'] #splice by row for all symbols
    
    print "Splice by row and column range:"
    print df.ix['2010-01-01':'2010-01-31', ['APPL', 'GLD']] #splice by row and by column

def test_run():
    for symbol in ['APPL', 'IBM']:
        print "Max close"
        print symbol, get_max_close(symbol)
        print "Mean Volume"
        print get_mean_volume(symbol)
        print "Plot"
        plot_adjusted_close(symbol)

In [2]:
if __name__ == "__main__":
    #Define Date Range
    start_date = '2010-01-01'
    end_date = '2010-12-31'
    date_range = pd.date_range(start_date, end_date)
    symbol_list = ['IBM', 'GLD', 'APPL']
    df = get_data(symbol_list, date_range)
#     plot_data(df)
    print df/ df.ix[:1]
# #     print df
#     plot_selected(df, ['IBM', 'APPL'], start_date, end_date)
    
    
    
        
    

            SPY  IBM  GLD  APPL
2010-01-04  1.0  1.0  1.0   1.0
2010-01-05  NaN  NaN  NaN   NaN
2010-01-06  NaN  NaN  NaN   NaN
2010-01-07  NaN  NaN  NaN   NaN
2010-01-08  NaN  NaN  NaN   NaN
2010-01-11  NaN  NaN  NaN   NaN
2010-01-12  NaN  NaN  NaN   NaN
2010-01-13  NaN  NaN  NaN   NaN
2010-01-14  NaN  NaN  NaN   NaN
2010-01-15  NaN  NaN  NaN   NaN
2010-01-19  NaN  NaN  NaN   NaN
2010-01-20  NaN  NaN  NaN   NaN
2010-01-21  NaN  NaN  NaN   NaN
2010-01-22  NaN  NaN  NaN   NaN
2010-01-25  NaN  NaN  NaN   NaN
2010-01-26  NaN  NaN  NaN   NaN
2010-01-27  NaN  NaN  NaN   NaN
2010-01-28  NaN  NaN  NaN   NaN
2010-01-29  NaN  NaN  NaN   NaN
2010-02-01  NaN  NaN  NaN   NaN
2010-02-02  NaN  NaN  NaN   NaN
2010-02-03  NaN  NaN  NaN   NaN
2010-02-04  NaN  NaN  NaN   NaN
2010-02-05  NaN  NaN  NaN   NaN
2010-02-08  NaN  NaN  NaN   NaN
2010-02-09  NaN  NaN  NaN   NaN
2010-02-10  NaN  NaN  NaN   NaN
2010-02-11  NaN  NaN  NaN   NaN
2010-02-12  NaN  NaN  NaN   NaN
2010-02-16  NaN  NaN  NaN   NaN
...     