In [17]:
# Import essential libraries #

import pandas as pd
import numpy as np
from numpy import percentile
import math
import datetime
from datetime import date
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from functools import reduce
import warnings

warnings.filterwarnings(action='once')

In [18]:
# CSV_Formatter prepares a dataframe of values and a dataframe of percentages for each date of the fund's history. 
# It cleans the csv by isolating one measurement per day, at the Close of business, and then creating
# a shifted column for comparison and creating a column of the daily percent change in value. 
# Both daily value and percent change are retained in two separate dataframes. 

def CSV_Formatter (folder_name, filename):
    df_init = pd.read_csv('Downloads/' + str(folder_name) +'/' + str(filename) +'.csv')
    df = df_init.set_index('Date')[['Close']]
    df.columns = [str(filename)]
    df['Next Day Values'] = df[filename].shift(-1)
    df['Percentages'] = df['Next Day Values']/df[str(filename)]
    df_final = df[['Percentages']]
    df_final.columns = [str(filename)]
    return(df[[filename]], df_final)

# The last function is a weekly master function which collects the percent changes week to week for a domain dataframe
# Domain dataframe should consist only of a date time index and a value series

def weekly_master(df):
    
# A little setup is necessary to ensure fidelity across weekly data.  In the original yahoo finance data downloads,
# Weekends and holidays are not counted in the Datetime Index.  The following code creates a working df that
# Can be broken into calendar weeks at regular 7 day intervals, to better reflect paycheck contributions and 
# subsequent analyses can be done on a week to week basis.

# Note that for days where data is unavailable, I've filled in the value 1, since the method of assessing portfolios
# Is multiplication across daily percentage changes.  In this way, days when no percent changes are documented do not
# affect the value of the investment.

    # NOTE THAT THIS FUNCTION WILL ONLY ACCOMODATE DATAFRAMES WITH UP TO 8 COLUMNS AS WRITTEN! #

    datelist = pd.to_datetime(df.index.values)
    df['Datetime'] = datelist
    df_timed = df.set_index('Datetime')
    labels = df_timed.columns

    df_segmented = pd.DataFrame(columns = labels)

    daterange = int(str(df_timed.index.max() - df_timed.index.min()).replace(" days 00:00:00", ''))
    all_dates = pd.date_range(df_timed.index.min(), periods=daterange).tolist()
    
    index_df = pd.DataFrame(all_dates)
    index_df.columns = ['Datetime']
    
    working_df = index_df.merge(df_timed, how = 'outer', left_on = 'Datetime', right_on ='Datetime')
    working_df = working_df.fillna(1).set_index('Datetime').sort_values('Datetime', ascending = False)

    days = len(working_df.index)
    number_of_weeks = int(np.floor(days/7))

    df_progress = pd.DataFrame(index=[0,1,2,3,4])
    weekly_eval = pd.DataFrame()

    for i in range(0, number_of_weeks):
        portfolio_segment = working_df.iloc[i*7:(i+1)*7]
        
        products =[]
        prod_1 = portfolio_segment.iloc[:, 0].product()
        products.append(prod_1)

        if len(labels) > 1:
            prod_2 = portfolio_segment.iloc[:, 1].product()
            products.append(prod_2)
        if len(labels) > 2:        
            prod_3 = portfolio_segment.iloc[:, 2].product()
            products.append(prod_3)
        if len(labels) > 3:
            prod_4 = portfolio_segment.iloc[:, 3].product()
            products.append(prod_4)
        if len(labels) > 4:
            prod_5 = portfolio_segment.iloc[:, 4].product()
            products.append(prod_5)
        if len(labels) > 5:
            prod_6 = portfolio_segment.iloc[:, 5].product()
            products.append(prod_6)
        if len(labels) > 6:
            prod_7 = portfolio_segment.iloc[:, 6].product()
            products.append(prod_7)
        if len(labels) > 7:
            prod_8 = portfolio_segment.iloc[:, 7].product()
            products.append(prod_8)
   
    # Can we just make this into a loop?
    #    For j in range(0, len(labels)):
    #        temp_prod = portfolio_segment.iloc[:, j].product()
    #        products.append(temp_prod)


        weekly_eval[str(portfolio_segment.index[7-1]).replace("00:00:00", '')] = products
    
    weekly_eval = weekly_eval.T
    weekly_eval.columns = labels
    weekly_eval.index = pd.to_datetime(weekly_eval.index)
    weekly_eval.index.name = 'Date'
    weekly_eval.sort_index()

    return(weekly_eval)


In [19]:
# This Cell reads all necessary source files for STOCK INDICES #

(PREIX, PREIX_final) = CSV_Formatter('Stock_Indices', 'PREIX')
(FUSEX, FUSEX_final) = CSV_Formatter('Stock_Indices', 'FUSEX')
(SWPPX, SWPPX_final) = CSV_Formatter('Stock_Indices', 'SWPPX')
(VFINX, VFINX_final) = CSV_Formatter('Stock_Indices', 'VFINX')
(VIGRX, VIGRX_final) = CSV_Formatter('Stock_Indices', 'VIGRX')

# Creates dataframes of their daily values and daily percent changes , aka increments.
dfstock_values = [PREIX[['PREIX']], FUSEX[['FUSEX']], SWPPX[['SWPPX']], VFINX[['VFINX']], VIGRX[['VIGRX']]]
dfstock_finals = [PREIX_final, FUSEX_final, SWPPX_final, VFINX_final, VIGRX_final]
stock_daily_values_df = reduce(lambda left, right: pd.merge(left, right, on = 'Date'), dfstock_values)
stock_increments_df = reduce(lambda left, right: pd.merge(left, right, on = 'Date'), dfstock_finals)

stock_weekly = weekly_master(stock_increments_df)




In [20]:
# This Cell reads all necessary source files for Intermediate Term Bonds #

(BIV, BIV_final) = CSV_Formatter('Intermediate_Bonds', 'BIV')
(HYG, HYG_final) = CSV_Formatter('Intermediate_Bonds', 'HYG')
(IEF, IEF_final) = CSV_Formatter('Intermediate_Bonds', 'IEF')
(IEI, IEI_final) = CSV_Formatter('Intermediate_Bonds', 'IEI')
(IGIB, IGIB_final) = CSV_Formatter('Intermediate_Bonds', 'IGIB')
(IPE, IPE_final) = CSV_Formatter('Intermediate_Bonds', 'IPE')
(ITE, ITE_final) = CSV_Formatter('Intermediate_Bonds', 'ITE')
(TIP, TIP_final) = CSV_Formatter('Intermediate_Bonds', 'TIP')

# Creates dataframes of their daily values and daily percent changes , aka increments.
df_itb_values = [BIV[['BIV']], HYG[['HYG']], IEF[['IEF']], IEI[['IEI']], IGIB[['IGIB']], IPE[['IPE']], ITE[['ITE']], TIP[['TIP']]]
df_itb_finals = [BIV_final, HYG_final, IEF_final, IEI_final, IGIB_final, IPE_final, ITE_final, TIP_final]
itb_daily_values_df = reduce(lambda left, right: pd.merge(left, right, on = 'Date'), df_itb_values)
itb_increments_df = reduce(lambda left, right: pd.merge(left, right, on = 'Date'), df_itb_finals)

itb_weekly = weekly_master(itb_increments_df)


In [21]:
# This Cell reads all necessary source files for Long Term Bonds #

(PRULX, PRULX_final) = CSV_Formatter('Long_Term_Bonds', 'PRULX')
(VUSTX, VUSTX_final) = CSV_Formatter('Long_Term_Bonds', 'VUSTX')
(WHOSX, WHOSX_final) = CSV_Formatter('Long_Term_Bonds', 'WHOSX')

# Creates dataframes of their daily values and daily percent changes , aka increments.
df_ltb_values = [PRULX[['PRULX']], VUSTX[['VUSTX']], WHOSX[['WHOSX']]]
df_ltb_finals = [PRULX_final, VUSTX_final, WHOSX_final]
ltb_daily_values_df = reduce(lambda left, right: pd.merge(left, right, on = 'Date'), df_ltb_values)
ltb_increments_df = reduce(lambda left, right: pd.merge(left, right, on = 'Date'), df_ltb_finals)

ltb_weekly = weekly_master(ltb_increments_df)




In [22]:
# This Cell reads all necessary source files for Gold 

(INIVX, INIVX_final) = CSV_Formatter('Gold', 'INIVX')
(OPGSX, OPGSX_final) = CSV_Formatter('Gold', 'OPGSX')
(SGGDX, SGGDX_final) = CSV_Formatter('Gold', 'SGGDX')
(USERX, USERX_final) = CSV_Formatter('Gold', 'USERX')
(VGPMX, VGPMX_final) = CSV_Formatter('Gold', 'VGPMX')

# Creates dataframes of their daily values and daily percent changes , aka increments.
dfgold_values = [INIVX[['INIVX']], OPGSX[['OPGSX']], SGGDX[['SGGDX']], USERX[['USERX']], VGPMX[['VGPMX']]]
dfgold_finals = [INIVX_final, OPGSX_final, SGGDX_final, USERX_final, VGPMX_final]
gold_daily_values_df = reduce(lambda left, right: pd.merge(left, right, on = 'Date'), dfgold_values)
gold_increments_df = reduce(lambda left, right: pd.merge(left, right, on = 'Date'), dfgold_finals)

gold_weekly = weekly_master(gold_increments_df)


In [23]:
# This Cell reads all necessary source files from their respective download folders for Broad Basket Commodities #

(DBC, DBC_final) = CSV_Formatter('Broad_Commodities', 'DBC')
(DJP, DJP_final) = CSV_Formatter('Broad_Commodities', 'DJP')
(GSG, GSG_final) = CSV_Formatter('Broad_Commodities', 'GSG')
(GSP, GSP_final) = CSV_Formatter('Broad_Commodities', 'GSP')

# Creates dataframes of their daily values and daily percent changes , aka increments.
df_commod_values = [DBC[['DBC']], DJP[['DJP']], GSG[['GSG']], GSP[['GSP']]]
df_commod_finals = [DBC_final, DJP_final, GSG_final, GSP_final]
commod_daily_values_df = reduce(lambda left, right: pd.merge(left, right, on = 'Date'), df_commod_values)
commod_increments_df = reduce(lambda left, right: pd.merge(left, right, on = 'Date'), df_commod_finals)

commod_weekly = weekly_master(commod_increments_df)


In [24]:
# This cell creates a function to randomly select the funds in the initial portfolio #

def random_all_weather(stock, inter_bond, long_bond, gold, commod):
    
    # First, select a fund from each investment category
    stock_choice = stock.sample(axis=1).columns
    inter_choice = inter_bond.sample(axis=1).columns
    long_bond_choice = long_bond.sample(axis=1).columns
    gold_choice = gold.sample(axis=1).columns
    commod_choice = commod.sample(axis=1).columns
    
    # Extract the columns of percent changes from the df
    rand_stock_inc = stock_weekly[stock_choice]
    rand_inter_bond_inc = itb_weekly[inter_choice]
    rand_long_bond_inc = ltb_weekly[long_bond_choice]
    rand_gold_inc = gold_weekly[gold_choice]
    rand_commod_inc = commod_weekly[commod_choice]

    random_inc = rand_stock_inc.merge(
        rand_inter_bond_inc, on = 'Date').merge(
        rand_long_bond_inc, on = 'Date').merge(
        rand_gold_inc, on = 'Date').merge(
        rand_commod_inc, on = 'Date')
    
    # Return a portfolio of randomly sampled funds, one in each category, starting at the first date all 5 had value.
    # Please note that the .dropna() here will restrict the amount of historical data leveraged in each portfolio.
    portfolio = random_inc.dropna()
    
    return portfolio



In [26]:
# This is just a test of the random portfolio generating function.

test_random_portfolio = random_all_weather(stock_daily_values_df, 
                   itb_daily_values_df, 
                   ltb_daily_values_df, 
                   gold_daily_values_df, 
                   commod_daily_values_df)


test_random_portfolio.head()



Unnamed: 0_level_0,PREIX,IPE,VUSTX,USERX,DBC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-10-06,0.95969,1.002046,1.014545,1.037538,0.980274
2018-09-29,0.986616,0.991152,0.970018,1.016794,0.996723
2018-09-22,0.997584,0.996876,0.996485,0.982009,1.031549
2018-09-15,1.010667,0.997434,0.984429,1.015221,1.033178
2018-09-08,1.004518,0.99562,0.992275,1.036278,0.998257
