In [17]:
import glob
import pandas as pd
import numpy as np

In [18]:
inputLoc = '/home/tbrownex/data/retirement/funds/normalized/*'
outputLoc = '/home/tbrownex/data/retirement/funds/'

In [70]:
def calcMonthlyGrowthRate(df, fundName):
    # computes the compounded monthly return for a fund
    initVal = df.iloc[0][fundName]
    finalVal = df.iloc[-1][fundName]
    numMonths = len(df)
    return 1+np.rate(nper=numMonths, pmt=0, pv=-initVal, fv=finalVal)

In [71]:
def calcOverallAcct(df):
    # For each year compute the average pctChg across all funds
    cols = [col for col in df.columns if 'pctChg' in col]
    tmp = df[cols]
    total_pctChg = tmp.mean(axis=1)
    # Now the variance of those returns
    std = round(total_pctChg.std(), 3)
    # Given the average return for each year, compute the annualized 
    # return across all years
    cumRet = total_pctChg.cumprod().dropna()
    numYears = len(cumRet)/12
    rate = round(np.power(cumRet[-1], 1/numYears), 4)
    
    d={}
    d['total']= {'var': std, 'annualGrowth': rate}
    return pd.DataFrame.from_dict(d).T

In [72]:
def processFund(df, fundName):
    '''  For each fund get the:
    - volatility of monthly returns
    - annualized growth rate '''
    df.set_index('date', inplace=True)
    df.sort_index(inplace=True)
    df["pctChg"] = df[fundName].pct_change()
    d = {'volatilty': df['pctChg'].std(),
         'annualReturn': calcMonthlyGrowthRate(df, fundName)}
    return pd.DataFrame(d, index=[fundName])

In [73]:
def formatFund(fund):
    fundName = fund.split("/normalized/")[1]
    return fundName.split(".csv")[0]

In [76]:
dfList = []

for fund in glob.glob(inputLoc):
    fundName = formatFund(fund)
    print(fundName)
    df = pd.read_csv(fund)
    summary = processFund(df, fundName)
    dfList.append(summary)

ANAYX
PHYTX
NUSI
PCLPX
IJH
RYLD
COIIX
MEIIX
PZTRX
PEP
QYLD
VEA
VFIAX
TRBCX
EFA
VAIPX
IJR
PIREX
NFFFX
TEAFX
PRILX
POSIX
GSDIX
NHMRX
MTIIX
SKIRX
PONPX
COVRX


  return 1+np.rate(nper=numMonths, pmt=0, pv=-initVal, fv=finalVal)


In [78]:
summary = pd.concat(dfList)
summary = summary.round(3)
summary.reset_index(inplace=True)
summary.rename(columns={'index': 'fund'}, inplace=True)
summary.to_csv(outputLoc+'fundSummary.csv', index=False)