# Individual Stock Data Collection and Wrangling

In [356]:
# libraries
import pandas as pd
import yfinance as yf
from datetime import date
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import functools

goal: make functions for get  
- 1st of the month cost per share for a given time period
- quarterly dividend amt per share for a given time period
- calculate the average dividend 
- calculate the average monthly cost per share

In [357]:
portfolio = pd.DataFrame({
    'ticker':['FXAIX','FSSNX','FSPSX','VDADX','FXNAX','VGAVX','FSRNX'],
    'quantity':[16.81,18.957,11.455,76.756,12.256,197.257,18.878],
    'category':['stock','stock','stock','bond','bond','bond','real estate'],
    'future_percents':[30,25,25,5,5,5,5]
})
# monthly amount to invest total
monthly_investments = 30
# Number of years plan to invest
years_to_invest = 30

In [358]:
# calculate the year the individual started investing
today = date.today()
month = today.month
year = today.year

In [359]:
# Function to connect to yahoo finance and cache the information we get
@functools.cache
def connectYahooFinance (ticker):
    ticker_data = yf.Ticker(ticker)
    ticker_data = pd.DataFrame(ticker_data.history(period = 'max'))
    ticker_data.reset_index(inplace = True)
    return ticker_data

In [360]:
# function to get historical monthly cost per share
def MonthlyCost (ticker):
    # first call the connect to yahoofinance function
    ticker_data = connectYahooFinance(ticker)
    # filter data to only include info from the first of the month
    return ticker_data[ticker_data['Date'].dt.is_month_start]

In [361]:
# function to get historical dividend amt per share
def quarterlyDividends (ticker):
    # first call the conncet to yahoofinance function
    ticker_data = connectYahooFinance(ticker)
    # filter data to only include info when dividends were distributed
    return ticker_data[ticker_data['Dividends']!=0]

In [362]:
# function to calcuate the average dividends for each ticker
def CalculateAvgDividend(ticker, years_to_invest):
    # get the historical dividends
    dividends = pd.DataFrame(quarterlyDividends(ticker))
    # find the first year the stock existed
    first_year = dividends.iloc[0]
    first_year = first_year['Date'].year
    # determine which came first, the timespan the user plans to invest or the first year the fund existed, then determine the date of that 
    years_invest = year-min(year-first_year, years_to_invest)
    years_invest = f'{years_invest}-01-01'
    # get all of the dividends since that date
    dividends = dividends[(dividends['Date']> years_invest)]
    # find the average of those dividends
    average_dividend = dividends['Dividends'].mean()
    return average_dividend

In [363]:
# function to calc avg cost per share for each ticker
def CalculateAvgCostPerShare(ticker, years_to_invest, monthly_cost):
    cost_per_share = pd.DataFrame(monthly_cost)
    # find the first year the stock existed
    first_year = cost_per_share.iloc[0]
    first_year = first_year['Date'].year
    # determine which came first, the timespan the user plans to invest or the first year the fund existed, then determine the date of that 
    years_invest = year-min(year-first_year, years_to_invest)
    years_invest = f'{years_invest}-01-01'
    # find all of the cost per shares that have occured since that date
    cost_per_share = cost_per_share[(cost_per_share['Date']>years_invest)]
    # take the average of those values
    average_cost = cost_per_share['Open'].mean()
    return average_cost

In [364]:
# how much is your current initial investment worth
def calcCurrentWorth (portfolio,ticker,monthly_cost):
    # get the data from the portfolio of the specific ticker
    quantity = portfolio.loc[portfolio['ticker'] == ticker]['quantity']
    # get the data of the start of each month for the data
    cost_per_share = pd.DataFrame(monthly_cost)
    # find the most recent value
    cost_per_share = cost_per_share.iloc[-1:]
    # multiply that most recent value by the quantity the user owns
    current_value = cost_per_share['Open'].values[0]*quantity.values[0]
    return current_value

In [365]:
# calculate the average annual interest rate of the individual stocks
def interestRate(ticker,years_to_invest,monthly_cost):
    # get a dataframe of the monthly costs for the specific ticker
    cost_per_share = pd.DataFrame(monthly_cost)
    # make an empty list to hold our growth rates
    GR = []
    # find one year after the first year that the stock existed
    first_year = cost_per_share.iloc[0]
    first_year = first_year['Date'].year+1
    # determine which happended first, the first year the stock existed or the number of years the user plans to invest
    years_to_invest = min(year-first_year, years_to_invest)
    # itereate through the number of year
    for i in range (0,years_to_invest):
        # find the date of one year
        old_date =f'{year-i}-12-01'
        old_value = cost_per_share.loc[cost_per_share['Date']==old_date]
        m=12
        # determine if that dataframe from the first date is empty 
        while old_value['Open'].empty:
            # iterate through the months of the year till you find one with data
            m = m-1
            old_date = f'{year-i}-{m}-01'
            old_value = cost_per_share.loc[cost_per_share['Date']==old_date]
        # get the open value of that month        
        old_value = old_value['Open'].values[0]
        # find a year that is one year older than our previous year
        j = i+1
        older_date = f'{year-j}-m-01'
        older_value = cost_per_share.loc[cost_per_share['Date']==older_date]
        # determine if that dataframe is empty
        while older_value['Open'].empty:
            # iterate through the months of year till you find one with data
            older_date = f'{year-j}-{m}-01'
            older_value = cost_per_share.loc[cost_per_share['Date']==older_date]
            m = m-1
        # get the open value of that month
        older_value = older_value['Open'].values[0]
        # calculate the growth rate of that year
        GR_i = (old_value/older_value)-1
        # add the growth rate to our list
        GR.append(GR_i)
    # add all of the growth rates
    sum_GR = sum(GR)
    # divded that sum by the number of years
    AAGR = sum_GR/years_to_invest
    return AAGR

In [366]:
# calculate current portfolio worth 
def currentPortfolioWorth(Portfolio):
    # make an empty list for each of our tickers current values to go into
    current_value = []
    # iterate through all of the tickers in our portfolio
    for ticker in Portfolio['ticker']:
        # find the monthly cost of that ticker
        monthly_cost = MonthlyCost(ticker)
        # calculate the current worth of that ticker
        ticker_value = calcCurrentWorth(Portfolio,ticker,monthly_cost)
        # add that current worth to our list
        current_value.append(ticker_value)
    # sum all of the values in our list
    portfolio_current_value = sum(current_value)
    return portfolio_current_value

In [371]:
# make a function to predicted the growth of the portfolio
def totalInvestmentPrediction (Portfolio,Monthly_investments,Years_to_invest):
    # make a dataframe to hold the results for each of the tickers
    results = pd.DataFrame(columns = ['Month', 'Amount','Ticker'])
    # itereate through each of the tickers and their respective percents in the portfolio
    for ticker, percent in zip(Portfolio['ticker'], Portfolio['future_percents']):
        monthly_cost = MonthlyCost(ticker)                                  # find the monthly cost for each ticker
        percents = percent/100                                              # divided the percents by 100 to get them on the correct scale
        principal = calcCurrentWorth(Portfolio,ticker,monthly_cost)         # find the current worth of the stock
        interest = interestRate(ticker,Years_to_invest,monthly_cost)        # find the growth rate of the stock
        compounding_period = 12                                             # assign how often the interest will compound, 12 = monthly
        months = Years_to_invest*compounding_period                         # assign how lond the user plans to invest for
        monthly_contribution = percents*Monthly_investments                 # assign how much the user plans to invest in this stock per month
        dividends = CalculateAvgDividend(ticker,Years_to_invest)                            # calculate the average dividends returned
        dividends_compounding = 3                                           # the dividends compound quarterly
        avg_cost_per_share = CalculateAvgCostPerShare(ticker,Years_to_invest,monthly_cost)  # calculate the average cost per share of the stock
        # add the pricipal as the month one investment
        results =  results.append({'Month': 1, 'Amount': principal, 'Ticker':ticker}, ignore_index = True)
        # iterate through the months the user plans to invest for
        for i in range(2,months+1):
            # find the worth of the previous month
            amt = results.iloc[-1:]
            amt = amt['Amount'].values[0]
            # determine if it is a month that dividends are reinvested or not
            if (i%dividends_compounding)==0:
                    total = amt+(amt*(interest/12))+monthly_contribution+((amt/avg_cost_per_share)*dividends)
                    results =  results.append({'Month': i, 'Amount': total, 'Ticker':ticker}, ignore_index = True)
            else:
                    total = amt+(amt*(interest/12))+monthly_contribution
                    results =  results.append({'Month': i, 'Amount': total, 'Ticker':ticker}, ignore_index = True)
    return results

In [372]:
totalInvestmentPrediction(portfolio,monthly_investments,years_to_invest)

Unnamed: 0,Month,Amount,Ticker
0,1,2320.452287,FXAIX
1,2,2356.131767,FXAIX
2,3,2404.018365,FXAIX
3,4,2440.658648,FXAIX
4,5,2477.720205,FXAIX
...,...,...,...
2515,356,9337.240350,FSRNX
2516,357,9460.502173,FSRNX
2517,358,9519.702830,FSRNX
2518,359,9579.264559,FSRNX


In [373]:
# now we need to add up all of the rows that have the same years
def amountPeryear(Portfolio,Monthly_investments,Years_to_invest):
    # calculate the total portfolio growth
    total_portfolio = totalInvestmentPrediction(Portfolio,Monthly_investments,Years_to_invest)
    #### this is where i left off
    sum_portfolio = pd.DataFrame(columns = ['Year', 'Amount'])
    initial_invest = currentPortfolioWorth(Portfolio)
    sum_portfolio = sum_portfolio.append({'Year':0,'Amount':initial_invest},ignore_index=True)
    for i in range(1,Years_to_invest+1):
        per_year = total_portfolio[total_portfolio['Month']==i*12]['Amount'].sum()
        sum_portfolio =  sum_portfolio.append({'Year': i, 'Amount': per_year}, ignore_index = True)

    return sum_portfolio
    

In [None]:
amountPeryear(portfolio,monthly_investments,years_to_invest)

In [None]:
# graph the total data
total_investment = amountPeryear(portfolio,monthly_investments,years_to_invest)
fig = go.Figure(data=go.Scatter(x=total_investment['Year'], y=total_investment['Amount'], mode='lines', marker=dict(color='green')))
fig.show()

In [None]:
# add a column of what money we put in to the portfolio dataframe
def compareInvestedtoGrowth(total_investment, portfolio, monthly_investment, years):
    yearly_investment = monthly_investment*12
    portfolio_worth = currentPortfolioWorth(portfolio)
    money_inv = []
    money_inv.insert(0,portfolio_worth)
    money_inv.insert(1,portfolio_worth+yearly_investment)
    for i in range(2,years+1):
        last_year_amt = money_inv[i-1]
        this_year_amt = last_year_amt + yearly_investment
        money_inv.insert(i,this_year_amt)
    total_investment.insert(2,'Money_Invested',money_inv,True)
    return total_investment

compareInvestedtoGrowth(total_investment,portfolio,monthly_investments,years_to_invest)