# EDA + Simulation

Steve Donahue, www.github.com/sdonahue0132

This notebook

1) Loads information generates in the EDA + Simulation Data Preprocessing notebook

2) Develops additional functions to evaluate both random portfolios and portfolios with specific funds

3) Hosts code to generate a large number of simulated portfolios for statistical analysis

In [1]:
# Import essential libraries #

import pandas as pd
import numpy as np
from numpy import percentile
import math
import datetime
from datetime import date
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from functools import reduce
import warnings
from random import randint
import seaborn as sns
%matplotlib inline

warnings.filterwarnings(action='once')

In [3]:
# Load relevant csv files

stock_weekly = pd.read_csv('csv_files/stock_increments_training.csv')
itb_weekly = pd.read_csv('csv_files/itb_increments_training.csv')
ltb_weekly = pd.read_csv('csv_files/ltb_increments_training.csv')
gold_weekly = pd.read_csv('csv_files/gold_increments_training.csv')
commod_weekly = pd.read_csv('csv_files/commod_weekly_training.csv')

for df in [stock_weekly, itb_weekly, ltb_weekly, gold_weekly, commod_weekly]:
    df.index = df['Date']
    
stock_weekly = stock_weekly.drop('Date', axis = 1)
itb_weekly = itb_weekly.drop('Date', axis = 1)
ltb_weekly = ltb_weekly.drop('Date', axis = 1)
gold_weekly = gold_weekly.drop('Date', axis = 1)
commod_weekly = commod_weekly.drop('Date', axis = 1)


In [6]:
# Load relevant  functions

def random_all_weather(stock, inter_bond, long_bond, gold, commod):
    
    # First, select a fund from each investment category
    stock_choice = stock.sample(axis=1).columns
    inter_choice = inter_bond.sample(axis=1).columns
    long_bond_choice = long_bond.sample(axis=1).columns
    gold_choice = gold.sample(axis=1).columns
    commod_choice = commod.sample(axis=1).columns
    
    # Extract the columns of percent changes from the df
    rand_stock_inc = stock_weekly[stock_choice]
    rand_inter_bond_inc = itb_weekly[inter_choice]
    rand_long_bond_inc = ltb_weekly[long_bond_choice]
    rand_gold_inc = gold_weekly[gold_choice]
    rand_commod_inc = commod_weekly[commod_choice]

    random_inc = rand_stock_inc.merge(
        rand_inter_bond_inc, on = 'Date').merge(
        rand_long_bond_inc, on = 'Date').merge(
        rand_gold_inc, on = 'Date').merge(
        rand_commod_inc, on = 'Date')
    
    # Return a portfolio of randomly sampled funds, one in each category, starting at the first date all 5 had value.
    # Please note that the .dropna() here will restrict the amount of historical data leveraged in each portfolio.
    portfolio = random_inc.dropna()
    
    return portfolio


# Time weighted return captures an accurate total return, accounting for time of accrual.

def time_weighted_return (df):
    sorted_df = df.sort_index(ascending=True).dropna()
    sorted_df['after_cash_flows'] = sorted_df.ending_value + sorted_df.cash_flows
    sorted_df['prev_after_cash_flows'] = sorted_df.after_cash_flows.shift(1)
    sorted_df['HPR'] = (sorted_df.ending_value / sorted_df.prev_after_cash_flows) - 1
    sorted_df['HPR_plus_one'] = sorted_df.HPR + 1
    sorted_df = sorted_df.dropna()

    rate = sorted_df.HPR_plus_one.product()-1
    
    return (rate)
 
# This function is used to compute the Compounded Annual Growth Rate and return the identities and length of investment

def p_summary(performance, fund_identities, term_years):
    twr = time_weighted_return(performance)
    CAGR = (float((1+twr))**(float(1/term_years)) -1)
    return(CAGR, fund_identities, term_years)

#  This function provides a line plot of the portfolio across its length in years.

def p_grapher(test_df):
    test_df['Portfolio Values'] = test_df.sum(axis = 1)
    to_plot = test_df.sort_index(ascending = True)
    return(to_plot.plot(kind = 'line', y = 'Portfolio Values', color='#0504aa').format_xdata(years))

def periodic_rebalance_tester(invesment_stock, 
                              invesment_itb, 
                              invesment_ltb, 
                              invesment_gold, 
                              invesment_commod, 
                              contribution, 
                              interval):

    test = random_all_weather(stock_weekly, 
                              itb_weekly, 
                              ltb_weekly, 
                              gold_weekly, 
                              commod_weekly).dropna()

    test = test.sort_index(ascending = True)
    test_col = test.iloc[:, 0:5]
    num_weeks = test_col.shape[0]
    number_of_periods = int(np.floor(num_weeks/interval))
    
    
    # This block sets markers in the test dataframe for when to add contributions and when to rebalance.
    a = np.empty((num_weeks,))
    a[::2] = 0
    a[1::2] = 1
    a = a*contribution
    total_contributions = np.asarray(a).sum()
    b = np.zeros((num_weeks,))
    b[:b.size:interval] = 1

    test_col['contributions'] = a
    test_col['rebalance_y/n'] = b

    # This block determines the proportions that are targets for rebalancing
    invesment = invesment_stock + invesment_itb + invesment_ltb + invesment_gold + invesment_commod
    inc_stock = invesment_stock / invesment
    inc_itb = invesment_itb / invesment
    inc_ltb = invesment_ltb / invesment
    inc_gold = invesment_gold / invesment
    inc_commod = invesment_commod / invesment
    
    
    values = test_col.values

    # Progress lists are used to accumulate the values of the incremented investments
    progress_stock = []
    progress_itb = []
    progress_ltb = []
    progress_gold = []
    progress_commod = []
    random_labels = test.iloc[:, 0:5]
    test_labels = str(random_labels.columns)
    fund_identities = str(test_labels).strip('Index').replace('_x', '').replace('([', '').replace("], dtype='object')", '')
    

    # This loop applies the increments to the investment amounts, and appends the results to the progress lists.  
    # The Counter is used to reinvest dividends at the appropriate interval.
    

    for i,j,k, l, m, n, rebal in values:
        
        invesment_stock = invesment_stock*i + n*inc_stock
        invesment_itb = (invesment_itb*j + n*inc_itb) 
        invesment_ltb = invesment_ltb*k + n*inc_ltb
        invesment_gold = invesment_gold*l + n*inc_gold
        invesment_commod = invesment_commod*m + n*inc_commod
    
        if rebal == 1:
            subtotal = invesment_stock + invesment_itb + invesment_ltb + invesment_gold + invesment_commod
            invesment_stock = subtotal*inc_stock
            invesment_itb = subtotal*inc_itb
            invesment_ltb = subtotal*inc_ltb
            invesment_gold = subtotal*inc_gold
            invesment_commod = subtotal*inc_commod
    
        progress_stock.append(invesment_stock)
        progress_itb.append(invesment_itb)
        progress_ltb.append(invesment_ltb)
        progress_gold.append(invesment_gold)
        progress_commod.append(invesment_commod)

    
    # Lists are converted to arrays, and placed into a performace dataframe, which is evaluated for annualized return %
    result_stock = np.array(progress_stock)
    result_itb = np.array(progress_itb)
    result_ltb = np.array(progress_ltb)
    result_gold = np.array(progress_gold)
    result_commod = np.array(progress_commod)

    test['Stock_Eval'] = result_stock
    test['ITB_Eval']= result_itb
    test['LTB_Eval'] = result_ltb
    test['Gold_Eval'] = result_gold
    test['Commod_Eval']= result_commod

    performance = test.iloc[:, 5:10]
    performance['ending_value'] = performance.sum(axis=1)
    performance['cash_flows'] = a 
    performance['rebalanced'] = b
    performance = performance.sort_index(ascending = False)
    term_years = float(len(performance.index)/52.17857)
 
    return (performance, fund_identities, term_years)

def reverser(df):
    df_reversed = df.iloc[::-1]
    df_reversed['Reverse_Index']= df.index
    df_reversed = df_reversed.set_index('Reverse_Index')
    df_reversed.index.names = ['Date']
    return df_reversed

r_stock = reverser(stock_weekly)
r_itb = reverser(itb_weekly)
r_ltb = reverser(ltb_weekly)
r_gold = reverser(gold_weekly)
r_commod = reverser(commod_weekly)


def reverse_rebalancer(invesment_stock, 
                              invesment_itb, 
                              invesment_ltb, 
                              invesment_gold, 
                              invesment_commod, 
                              contribution, 
                              interval):

     # First, select a fund from each investment category
    stock_choice = r_stock.sample(axis=1).columns
    inter_choice = r_itb.sample(axis=1).columns
    long_bond_choice = r_ltb.sample(axis=1).columns
    gold_choice = r_gold.sample(axis=1).columns
    commod_choice = r_commod.sample(axis=1).columns
    
    # Extract the columns of percent changes from the df
    rand_stock_inc = r_stock[stock_choice]
    rand_inter_bond_inc = r_itb[inter_choice]
    rand_long_bond_inc = r_ltb[long_bond_choice]
    rand_gold_inc = r_gold[gold_choice]
    rand_commod_inc = r_commod[commod_choice]

    random_inc = rand_stock_inc.merge(
        rand_inter_bond_inc, on = 'Date').merge(
        rand_long_bond_inc, on = 'Date').merge(
        rand_gold_inc, on = 'Date').merge(
        rand_commod_inc, on = 'Date')
    
    test = random_inc.dropna()
    test = test.sort_index(ascending = True)
    test_col = test.iloc[:, 0:5]
    num_weeks = test_col.shape[0]
    number_of_periods = int(np.floor(num_weeks/interval))
    
    
    # This block sets markers in the test dataframe for when to add contributions and when to rebalance.
    a = np.empty((num_weeks,))
    a[::2] = 0
    a[1::2] = 1
    a = a*contribution
    total_contributions = np.asarray(a).sum()

    b = np.zeros((num_weeks,))
    b[:b.size:interval] = 1
    
    test_col['contributions'] = a
    test_col['rebalance_y/n'] = b
    
    # This block determines the proportions that are targets for rebalancing
    invesment = invesment_stock + invesment_itb + invesment_ltb + invesment_gold + invesment_commod
    inc_stock = invesment_stock / invesment
    inc_itb = invesment_itb / invesment
    inc_ltb = invesment_ltb / invesment
    inc_gold = invesment_gold / invesment
    inc_commod = invesment_commod / invesment
    
    values = test_col.values

    # Progress lists are used to accumulate the values of the incremented investments
    progress_stock = []
    progress_itb = []
    progress_ltb = []
    progress_gold = []
    progress_commod = []
    random_labels = test.iloc[:, 0:5]
    test_labels = str(random_labels.columns)
    fund_identities = str(test_labels).strip('Index').replace('_x', '').replace('([', '').replace("], dtype='object')", '')
    
    # This loop applies the increments to the investment amounts, and appends the results to the progress lists

    
    for i,j,k, l, m, n, rebal in values:
        
        invesment_stock = invesment_stock*i + n*inc_stock
        invesment_itb = (invesment_itb*j + n*inc_itb)
        invesment_ltb = invesment_ltb*k + n*inc_ltb
        invesment_gold = invesment_gold*l + n*inc_gold
        invesment_commod = invesment_commod*m + n*inc_commod
    
        if rebal == 1:
            subtotal = invesment_stock + invesment_itb + invesment_ltb + invesment_gold + invesment_commod
            invesment_stock = subtotal*inc_stock
            invesment_itb = subtotal*inc_itb
            invesment_ltb = subtotal*inc_ltb
            invesment_gold = subtotal*inc_gold
            invesment_commod = subtotal*inc_commod
    
        progress_stock.append(invesment_stock)
        progress_itb.append(invesment_itb)
        progress_ltb.append(invesment_ltb)
        progress_gold.append(invesment_gold)
        progress_commod.append(invesment_commod)
    
    # Lists are converted to arrays, and placed into a performace dataframe, which is evaluated for annualized return %
    result_stock = np.array(progress_stock)
    result_itb = np.array(progress_itb)
    result_ltb = np.array(progress_ltb)
    result_gold = np.array(progress_gold)
    result_commod = np.array(progress_commod)

    test['Stock_Eval'] = result_stock
    test['ITB_Eval']= result_itb
    test['LTB_Eval'] = result_ltb
    test['Gold_Eval'] = result_gold
    test['Commod_Eval']= result_commod

    performance = test.iloc[:, 5:10]
    performance['ending_value'] = performance.sum(axis=1)
    performance['cash_flows'] = a 
    performance['rebalanced'] = b
    performance = performance.sort_index(ascending = False)
    term_years = float(len(performance.index)/52.17857)
 
    return (performance, fund_identities, term_years)

r_stock = reverser(stock_weekly)
r_itb = reverser(itb_weekly)
r_ltb = reverser(ltb_weekly)
r_gold = reverser(gold_weekly)
r_commod = reverser(commod_weekly)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [8]:
# The following three cells are used to simulate a large number of All Weather Portfolios and export the results 
# to csv format.  The results of these exports may be found in the csv_files folder.

# Running these cells as written is very time intensive #

# This cell runs a simulation given an initial contribution amount and user-specified proportions for each category.#
# It then selects a rebalance period and biweekly contribution at random.  It then runs both forward an reverse tests #
# records the identities, rebal period, contribution, and averages the CAGR's in a new dataframe #


#Set contribution proportions and 'dummy row' for the export dataframe
stock_prop, itb_prop, ltb_prop, gold_prop, commod_prop = .30, .40, .15, .075, .075
export_df = pd.DataFrame()

for i in range(0, 7000):
    #Select random rebalace interval between a weekly rebalance and an annual rebalance
    rebal = randint(1,52)

    #Select contribution level randomly between $100 and $1000 every two weeks
    con = 500

    #Set breakdown of biweekly contributions
    stockz = con*stock_prop
    itbz = con*itb_prop
    ltbz = con*ltb_prop
    goldz = con*gold_prop
    commodz = con*commod_prop

    #Run the forward eval
    forward_df, forward_label, forward_years = periodic_rebalance_tester(stockz, itbz, ltbz, goldz, commodz, con, rebal)
    for_CAGR, for_identities, for_years = p_summary(forward_df, forward_label, forward_years)
    
    #Run the reverse eval
    backward_df, backward_label, backward_years = reverse_rebalancer(stockz, itbz, ltbz, goldz, commodz, con, rebal)
    back_CAGR, back_identities, back_years = p_summary(backward_df, backward_label, backward_years)

    #Get the Average CAGR
    avg_CAGR = (for_CAGR + back_CAGR)/2

    #Assemble the row for export
    export_df = export_df.append(pd.Series([forward_label, round(avg_CAGR*100, 2), rebal, con]), ignore_index = True)

#Clean the exported dataframe

export_df.columns = ['Portfolio', 'Estimated_Annual_Return', 'Weeks_until_Rebalance', 'Biweekly_Contribution']
export_df = export_df.sort_values(['Estimated_Annual_Return', 'Biweekly_Contribution'], ascending = [False, True])

export_df.head()
# Note that this cell has been run multiple times since the inception of the code, with alternative proportions
# The results are further analyzed in a subsequent cell, the dataframes are appended with the new analyses, and then
# pushed to csv files for reference.  This simulation is one part of a very time intensive process.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Portfolio,Estimated_Annual_Return,Weeks_until_Rebalance,Biweekly_Contribution
5330,"'VIGRX', 'BIV', 'PRULX', 'INIVX', 'DBC'",13.42,18.0,500.0
4788,"'VIGRX', 'HYG', 'WHOSX', 'INIVX', 'DBC'",13.36,36.0,500.0
4133,"'VIGRX', 'IEF', 'WHOSX', 'INIVX', 'DJP'",13.34,32.0,500.0
4253,"'VIGRX', 'BIV', 'VUSTX', 'OPGSX', 'GSP'",13.34,37.0,500.0
5508,"'VIGRX', 'IEF', 'VUSTX', 'OPGSX', 'DBC'",13.34,24.0,500.0


In [11]:
# Let's take a closer look at those funds which had the best rates of return, even at low contributions.
# Would higher contributions affect them adversely?  One would think they wouldn't do much better, if the simulation
# had a larger enough sample size.

# This function develops the WEIGHTED CAGR which will be used for assembling the master data set for this project #

def specific_tester(stock_name, itb_name, ltb_name, gold_name, commod_name, 
                    stock_prop, itb_prop, ltb_prop, gold_prop, commod_prop, contribution, interval):

    # Extract the columns of percent changes from the forward facing df
    rand_stock_inc = stock_weekly[[stock_name]]
    rand_inter_bond_inc = itb_weekly[[itb_name]]
    rand_long_bond_inc = ltb_weekly[[ltb_name]]
    rand_gold_inc = gold_weekly[[gold_name]]
    rand_commod_inc = commod_weekly[[commod_name]]

    random_inc = rand_stock_inc.merge(
        rand_inter_bond_inc, on = 'Date').merge(
        rand_long_bond_inc, on = 'Date').merge(
        rand_gold_inc, on = 'Date').merge(
        rand_commod_inc, on = 'Date')
    
    # Return a portfolio of randomly sampled funds, one in each category, starting at the first date all 5 had value.
    # Please note that the .dropna() here will restrict the amount of historical data leveraged in each portfolio.
    test = random_inc.dropna()
    
    test = test.sort_index(ascending = True)
    test_col = test.iloc[:, 0:5]
    num_weeks = test_col.shape[0]
    number_of_periods = int(np.floor(num_weeks/interval))

        
    # This block sets markers in the test dataframe for when to add contributions and when to rebalance.
    a = np.empty((num_weeks,))
    a[::2] = 0
    a[1::2] = 1
    a = a*contribution
    total_contributions = np.asarray(a).sum()
    b = np.zeros((num_weeks,))
    b[:b.size:interval] = 1
   
    test_col['contributions'] = a
    test_col['rebalance_y/n'] = b
    
    # This block determines the proportions that are targets for rebalancing
    inc_stock = stock_prop
    inc_itb = itb_prop
    inc_ltb = ltb_prop
    inc_gold = gold_prop
    inc_commod = commod_prop
    
    invesment_stock = contribution*inc_stock
    invesment_itb = contribution*inc_itb
    invesment_ltb = contribution*inc_ltb
    invesment_gold = contribution*inc_gold
    invesment_commod = contribution*inc_commod
    
    values = test_col.values

    # Progress lists are used to accumulate the values of the incremented investments
    progress_stock = []
    progress_itb = []
    progress_ltb = []
    progress_gold = []
    progress_commod = []
    random_labels = test.iloc[:, 0:5]
    test_labels = str(random_labels.columns)
    fund_identities = str(test_labels).strip('Index').replace('_x', '').replace('([', '').replace("], dtype='object')", '')
    
    # This loop applies the increments to the investment amounts, and appends the results to the progress lists

    for i,j,k, l, m, n, rebal in values:

        invesment_stock = invesment_stock*i + n*inc_stock
        invesment_itb = (invesment_itb*j + n*inc_itb)
        invesment_ltb = invesment_ltb*k + n*inc_ltb
        invesment_gold = invesment_gold*l + n*inc_gold
        invesment_commod = invesment_commod*m + n*inc_commod
    
        if rebal == 1:
            subtotal = invesment_stock + invesment_itb + invesment_ltb + invesment_gold + invesment_commod
            invesment_stock = subtotal*inc_stock
            invesment_itb = subtotal*inc_itb
            invesment_ltb = subtotal*inc_ltb
            invesment_gold = subtotal*inc_gold
            invesment_commod = subtotal*inc_commod
    
        progress_stock.append(invesment_stock)
        progress_itb.append(invesment_itb)
        progress_ltb.append(invesment_ltb)
        progress_gold.append(invesment_gold)
        progress_commod.append(invesment_commod)
        
    
    # Lists are converted to arrays, and placed into a performace dataframe, which is evaluated for annualized return %
    result_stock = np.array(progress_stock)
    result_itb = np.array(progress_itb)
    result_ltb = np.array(progress_ltb)
    result_gold = np.array(progress_gold)
    result_commod = np.array(progress_commod)

    test['Stock_Eval'] = result_stock
    test['ITB_Eval']= result_itb
    test['LTB_Eval'] = result_ltb
    test['Gold_Eval'] = result_gold
    test['Commod_Eval']= result_commod

    performance = test.iloc[:, 5:10]
    performance['ending_value'] = performance.sum(axis=1)
    performance['cash_flows'] = a
    performance['rebalanced'] = b
    f_performance = performance.sort_index(ascending = False)
    term_years = float(len(f_performance.index)/52.17857)

    for_CAGR, for_identities, for_years = p_summary(f_performance, fund_identities, term_years)
    
    # Extract the columns of percent changes from the r_df's
    rand_stock_inc = r_stock[[stock_name]]
    rand_inter_bond_inc = r_itb[[itb_name]]
    rand_long_bond_inc = r_ltb[[ltb_name]]
    rand_gold_inc = r_gold[[gold_name]]
    rand_commod_inc = r_commod[[commod_name]]
                  
    random_inc = rand_stock_inc.merge(
        rand_inter_bond_inc, on = 'Date').merge(
        rand_long_bond_inc, on = 'Date').merge(
        rand_gold_inc, on = 'Date').merge(
        rand_commod_inc, on = 'Date')
    
    # Return a portfolio of randomly sampled funds, one in each category, starting at the first date all 5 had value.
    # Please note that the .dropna() here will restrict the amount of historical data leveraged in each portfolio.
    test = random_inc.dropna()
    
    test = test.sort_index(ascending = True)
    test_col = test.iloc[:, 0:5]
    num_weeks = test_col.shape[0]
    number_of_periods = int(np.floor(num_weeks/interval))

        
    # This block sets markers in the test dataframe for when to add contributions and when to rebalance.
    a = np.empty((num_weeks,))
    a[::2] = 0
    a[1::2] = 1
    a = a*contribution
    total_contributions = np.asarray(a).sum()
    b = np.zeros((num_weeks,))
    b[:b.size:interval] = 1
    
    test_col['contributions'] = a
    test_col['rebalance_y/n'] = b

    # This block determines the proportions that are targets for rebalancing
    inc_stock = stock_prop
    inc_itb = itb_prop
    inc_ltb = ltb_prop
    inc_gold = gold_prop
    inc_commod = commod_prop
    
    invesment_stock = contribution*inc_stock
    invesment_itb = contribution*inc_itb
    invesment_ltb = contribution*inc_ltb
    invesment_gold = contribution*inc_gold
    invesment_commod = contribution*inc_commod
    
    values = test_col.values

    # Progress lists are used to accumulate the values of the incremented investments
    progress_stock = []
    progress_itb = []
    progress_ltb = []
    progress_gold = []
    progress_commod = []
    random_labels = test.iloc[:, 0:5]
    test_labels = str(random_labels.columns)
    fund_identities = str(test_labels).strip('Index').replace('_x', '').replace('([', '').replace("], dtype='object')", '')
    
    # This loop applies the increments to the investment amounts, and appends the results to the progress lists

    for i,j,k, l, m, n, rebal in values:
        
        invesment_stock = invesment_stock*i + n*inc_stock
        invesment_itb = (invesment_itb*j + n*inc_itb)
        invesment_ltb = invesment_ltb*k + n*inc_ltb
        invesment_gold = invesment_gold*l + n*inc_gold
        invesment_commod = invesment_commod*m + n*inc_commod
    
        if rebal == 1:
            subtotal = invesment_stock + invesment_itb + invesment_ltb + invesment_gold + invesment_commod
            invesment_stock = subtotal*inc_stock
            invesment_itb = subtotal*inc_itb
            invesment_ltb = subtotal*inc_ltb
            invesment_gold = subtotal*inc_gold
            invesment_commod = subtotal*inc_commod
    
        progress_stock.append(invesment_stock)
        progress_itb.append(invesment_itb)
        progress_ltb.append(invesment_ltb)
        progress_gold.append(invesment_gold)
        progress_commod.append(invesment_commod)
        
    # Lists are converted to arrays, and placed into a performace dataframe, which is evaluated for annualized return %
    result_stock = np.array(progress_stock)
    result_itb = np.array(progress_itb)
    result_ltb = np.array(progress_ltb)
    result_gold = np.array(progress_gold)
    result_commod = np.array(progress_commod)

    test['Stock_Eval'] = result_stock
    test['ITB_Eval']= result_itb
    test['LTB_Eval'] = result_ltb
    test['Gold_Eval'] = result_gold
    test['Commod_Eval']= result_commod

    performance = test.iloc[:, 5:10]
    performance['ending_value'] = performance.sum(axis=1)
    performance['cash_flows'] = a
    performance['rebalanced'] = b
    b_performance = performance.sort_index(ascending = False)
    term_years = float(len(b_performance.index)/52.17857)

    back_CAGR, back_identities, back_years = p_summary(b_performance, fund_identities, term_years)
    low_CAGR = min(for_CAGR, back_CAGR)
    high_CAGR = max(for_CAGR, back_CAGR)
    mean_CAGR = round((back_CAGR + for_CAGR)/2*100, 2)
    CAGR_range = str(round(low_CAGR*100, 2)) + ' to ' + str(round(high_CAGR*100, 2))
    
    return (f_performance, b_performance, mean_CAGR, CAGR_range, for_identities, for_years)

# This function is helpful for removing unusually large gains from the list of typical 52 week returns that will be 
# created in the next cell.  It does not affect large losses due to the heavily right-skew distributions in the
# CAGR distributions.

def reject_outliers(data, m=2):
    return data[abs(data - np.mean(data)) < m * np.std(data)]
    


In [12]:
# This cell develops a list of 52 week returns for each row of the dataframe fed to it.  
# It then determines the min and max returns (minus any occuring outlier gains)
# The typical return range is the middle 90% returns in all 52 week periods
# The results of this cell (as written with 5000 resamples) has been exported as 5000_All_Weather.csv
# This cell is the second part of the process for forming resampled dataframes for further study
# It is not recommended that an observer run this cell without having ample time (6 to 7 hours) to let it 
# complete its work.

num_rows = export_df.shape[0]
listing = []

for j in range(num_rows):
    # Separate the fund names so they can be used as inputs
    funds = export_df.Portfolio[j].replace("'", "").replace(' ','').split(',')
    stock_id = funds[0]
    itb_id = funds[1]
    ltb_id = funds[2]
    gold_id = funds[3]
    commod_id = funds[4]

    # identify rebal, contribution
    rebal = export_df.Weeks_until_Rebalance[j]
    contribution = export_df.Biweekly_Contribution[j]

    f_df, b_df, CAGR_mean, CAGR_range, ident, years = specific_tester(
        stock_id, itb_id, ltb_id, gold_id, commod_id, .30, .40, .15, .075, .075, contribution, int(rebal))

    number_of_weeks = f_df.shape[0]
    summaries = []

    for i in range(0, number_of_weeks-52):
        selection = f_df.iloc[i:i+51, :]
        CAGR, identities, length = p_summary(selection, ident, 1)
        summaries = np.append(summaries, round(CAGR*100,2))

    modified = reject_outliers(summaries)
    CAGR_range =  str(round(np.percentile(modified, 5), (2))) + ' to ' + str(round(np.percentile(modified,95), 2))
    
    
    listing.append([min(modified), max(modified), CAGR_range])

    placeholder = pd.DataFrame(listing, columns = ['Worst_Year', 'Best_Year', 'Typical_Returns'])

result = pd.concat([export_df, placeholder], axis=1, join='inner')

# This block cleans the resulting dataframe and then exports it to a csv file

data_set = result[['Portfolio', 'Weeks_until_Rebalance', 'Biweekly_Contribution', 'Worst_Year', 
                   'Best_Year', 'Estimated_Annual_Return', 'Typical_Returns']]
data_set = data_set.sort_values(['Estimated_Annual_Return', 'Biweekly_Contribution', 'Worst_Year'], 
                                ascending = [False, True, True])

#Note that here, Typical Returns refer to the middle 90% returns in a given 52 week period
data_set.head()

data_set.to_csv('csv_files/7000_all_weather_portfolios.csv')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
