# Individual Stock Data Collection and Wrangling

In [136]:
# libraries
import pandas as pd
import yfinance as yf
from datetime import date
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

goal: make functions for get  
- 1st of the month cost per share for a given time period
- quarterly dividend amt per share for a given time period
- calculate the average dividend 
- calculate the average monthly cost per share

In [137]:
portfolio = pd.DataFrame({
    'ticker':['FXAIX','FSSNX','FSPSX','VDADX','FXNAX','VGAVX','FSRNX'],
    'quantity':[16.81,18.957,11.455,76.756,12.256,197.257,18.878],
    'category':['stock','stock','stock','bond','bond','bond','real estate'],
    'future_percents':[0.30,0.25,0.25,0.05,0.05,0.05,0.05]
})
# monthly amount to invest total
monthly_investments = 6000
# Number of years plan to invest
years_to_invest = 9

In [138]:
# calculate the year the individual started investing
today = date.today()
month = today.month
year = today.year

In [139]:
# Function to connect to yahoo finance and create soup object
def connectYahooFinance (ticker):
    ticker_data = yf.Ticker(ticker)
    ticker_data = pd.DataFrame(ticker_data.history(period = 'max'))
    ticker_data.reset_index(inplace = True)
    return ticker_data

In [140]:
connectYahooFinance('FXAIX')

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2011-05-04,37.664501,37.664501,37.664501,37.664501,0,0.0,0
1,2011-05-05,37.325237,37.325237,37.325237,37.325237,0,0.0,0
2,2011-05-06,37.475151,37.475151,37.475151,37.475151,0,0.0,0
3,2011-05-09,37.640835,37.640835,37.640835,37.640835,0,0.0,0
4,2011-05-10,37.948521,37.948521,37.948521,37.948521,0,0.0,0
...,...,...,...,...,...,...,...,...
2872,2022-09-30,124.930000,124.930000,124.930000,124.930000,0,0.0,0
2873,2022-10-03,128.169998,128.169998,128.169998,128.169998,0,0.0,0
2874,2022-10-04,132.100006,132.100006,132.100006,132.100006,0,0.0,0
2875,2022-10-05,131.839996,131.839996,131.839996,131.839996,0,0.0,0


In [141]:
# function to get historical monthly cost per share
def MonthlyCost (ticker):
    # first call the connect to yahoofinance function
    ticker_data = connectYahooFinance(ticker)
    # filter data to only include info from the first of the month
    return ticker_data[ticker_data['Date'].dt.is_month_start]

In [142]:
MonthlyCost('FXAIX')

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
19,2011-06-01,36.828194,36.828194,36.828194,36.828194,0,0.000,0
41,2011-07-01,37.593491,37.593491,37.593491,37.593491,0,0.000,0
61,2011-08-01,35.968243,35.968243,35.968243,35.968243,0,0.000,0
84,2011-09-01,33.751289,33.751289,33.751289,33.751289,0,0.000,0
126,2011-11-01,34.243118,34.243118,34.243118,34.243118,0,0.000,0
...,...,...,...,...,...,...,...,...
2747,2022-04-01,157.093353,157.093353,157.093353,157.093353,0,0.000,0
2788,2022-06-01,142.105713,142.105713,142.105713,142.105713,0,0.000,0
2809,2022-07-01,132.740005,132.740005,132.740005,132.740005,0,0.577,0
2829,2022-08-01,143.059998,143.059998,143.059998,143.059998,0,0.000,0


In [143]:
# function to scrape historical dividend amt per share
def quarterlyDividends (ticker):
    # first call the conncet to yahoofinance function
    ticker_data = connectYahooFinance(ticker)
    # filter data to only include info when dividends were distributed
    return ticker_data[ticker_data['Dividends']!=0]

In [144]:
quarterlyDividends('FXAIX')

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
109,2011-10-07,32.458378,32.458378,32.458378,32.458378,0,0.223,0
158,2011-12-16,34.407169,34.407169,34.407169,34.407169,0,0.256,0
232,2012-04-04,39.716743,39.716743,39.716743,39.716743,0,0.186,0
296,2012-07-06,38.679874,38.679874,38.679874,38.679874,0,0.288,0
360,2012-10-05,41.944386,41.944386,41.944386,41.944386,0,0.282,0
407,2012-12-14,40.780762,40.780762,40.780762,40.780762,0,0.345,0
482,2013-04-05,45.09523,45.09523,45.09523,45.09523,0,0.271,0
545,2013-07-05,47.637569,47.637569,47.637569,47.637569,0,0.279,0
609,2013-10-04,49.607006,49.607006,49.607006,49.607006,0,0.309,0
658,2013-12-13,52.317482,52.317482,52.317482,52.317482,0,0.344,0


In [145]:
# function to calcuate the average dividends for each ticker
def CalculateAvgDividend(ticker):
    dividends = pd.DataFrame(quarterlyDividends(ticker))
    # need to get the data of the past five years
    five_years_ago =f'{today.year-5}-01-01'
    dividends = dividends[(dividends['Date']> five_years_ago)]
    average_dividend = dividends['Dividends'].mean()
    return average_dividend

In [146]:
CalculateAvgDividend('FXAIX')

0.4920454545454545

In [147]:
# function to calc avg cost per share for each ticker
def CalculateAvgCostPerShare(ticker):
    cost_per_share = pd.DataFrame(MonthlyCost(ticker))
    five_years_ago =f'{today.year-5}-01-01'
    cost_per_share = cost_per_share[(cost_per_share['Date']>five_years_ago)]
    average_cost = cost_per_share['Open'].mean()
    return average_cost

In [148]:
CalculateAvgCostPerShare('FXAIX')

110.53347463193147

In [149]:
# how much is your current initial investment worth
def calcCurrentWorth (ticker):
    quantity = portfolio.loc[portfolio['ticker'] == ticker]['quantity']
    cost_per_share = pd.DataFrame(MonthlyCost(ticker))
    cost_per_share = cost_per_share.iloc[-1:]
    current_value = cost_per_share['Open'].values[0]*quantity.values[0]
    return current_value

In [150]:
calcCurrentWorth('FXAIX')

2320.452287139892

In [151]:
# calculate the annual interest rate of the individual stocks
def interestRate(ticker):
    # get a dataframe of the monthly costs for the specific ticker
    cost_per_share = pd.DataFrame(MonthlyCost(ticker))
    # get the current cost per share of the ticker
    current_value = cost_per_share.iloc[-1:]   
    current_value = current_value['Open'].values[0]
    # get the cost per share five years ago
    old_date =f'{today.year-6}-12-01'
    old_value = cost_per_share.loc[cost_per_share['Date']==old_date]
    old_value = old_value['Open'].values[0]
    # calculate the compounding annual growth rate of the stock
    CAGR = (current_value/old_value)**(1/(year-2015))-1
    return CAGR

In [152]:
interestRate('FXAIX')

0.10473965835058996

In [153]:
# compounding calculation
def compoundingCalc (ticker, monthly_investment, years_future_invest):
    # assign the variables for our calculation
    principal = calcCurrentWorth(ticker)            # find the current worth of the stock
    interest = interestRate(ticker)                 # find the growth rate of the stock over the past five years
    compounding_period = 12                         # assign how often the interest will compound, 12 = monthly
    year = years_future_invest                      # assign how lond the user plans to invest for
    monthly_contribution = monthly_investment       # assign how much the user plans to invest in this stock per month
    # create a dataframe to gold our results
    results = pd.DataFrame(columns = ['Year', 'Amount'])
    
    # itereate through the years the user plans to invest for
    for i in range(1,year+1):
        Year = i

        # perform the actual calculation
        Amount = principal*np.power((1 + interest / compounding_period), compounding_period * i)+(monthly_contribution)*(np.power((1 + interest / compounding_period), compounding_period * i)-1)/(interest / compounding_period)
        # add the results to the dataframe
        results =  results.append({'Year': Year, 'Amount': Amount}, ignore_index = True)
    return results

In [154]:
compoundingCalc('FXAIX',1000,8)

Unnamed: 0,Year,Amount
0,1.0,15168.671149
1,2.0,29429.126417
2,3.0,45257.04674
3,4.0,62824.723022
4,5.0,82323.383845
5,6.0,103965.277048
6,7.0,127985.980092
7,8.0,154646.964385


In [155]:
# compounding calculation with dividends
def compoundingCalcDividends (ticker, monthly_investment, years_future_invest):
        # create a dataframe to gold our results
        results = pd.DataFrame(columns = ['Month', 'Amount'])

        principal = calcCurrentWorth(ticker)                        # find the current worth of the stock
        interest = interestRate(ticker)                             # find the growth rate of the stock over the past five years
        compounding_period = 12                                     # assign how often the interest will compound, 12 = monthly
        months = years_future_invest*12                             # assign how lond the user plans to invest for
        monthly_contribution = monthly_investment                   # assign how much the user plans to invest in this stock per month
        dividends = CalculateAvgDividend(ticker)                    # calculate the average dividends returned
        dividends_compounding = 3                                   # the dividends compound quarterly
        avg_cost_per_share = CalculateAvgCostPerShare(ticker)       # calculate the average cost per share of the stock
        results =  results.append({'Month': 1, 'Amount': principal}, ignore_index = True)
        for i in range(2,months+1):
                Month = i
                amt = results.iloc[-1:]
                amt = amt['Amount'].values[0]
                if (i%3)==0:
                        total = amt+(amt*(interest/12))+monthly_contribution+(amt/avg_cost_per_share)*dividends
                        results =  results.append({'Month': Month, 'Amount': total}, ignore_index = True)
                else:
                        total = amt+(amt*(interest/12))+monthly_contribution
                        results =  results.append({'Month': Month, 'Amount': total}, ignore_index = True)
        return results

In [156]:
compoundingCalcDividends('FXAIX',1000,8)

Unnamed: 0,Month,Amount
0,1.0,2320.452287
1,2.0,3340.705899
2,3.0,4384.735918
3,4.0,5423.007223
4,5.0,6470.340876
...,...,...
91,92.0,154531.714270
92,93.0,157568.419843
93,94.0,159943.724819
94,95.0,162339.762176


In [157]:

def totalInvestmentPrediction (Portfolio,Monthly_investments,Years_to_invest):
    results = pd.DataFrame(columns = ['Month', 'Amount','Ticker'])

    for ticker, percent in zip(Portfolio['ticker'], Portfolio['future_percents']):

        principal = calcCurrentWorth(ticker)                        # find the current worth of the stock
        interest = interestRate(ticker)                             # find the growth rate of the stock over the past five years
        compounding_period = 12                                     # assign how often the interest will compound, 12 = monthly
        months = Years_to_invest*compounding_period                                     # assign how lond the user plans to invest for
        monthly_contribution = percent*Monthly_investments          # assign how much the user plans to invest in this stock per month
        dividends = CalculateAvgDividend(ticker)                    # calculate the average dividends returned
        dividends_compounding = 3                                   # the dividends compound quarterly
        avg_cost_per_share = CalculateAvgCostPerShare(ticker)       # calculate the average cost per share of the stock
        results =  results.append({'Month': 1, 'Amount': principal, 'Ticker':ticker}, ignore_index = True)

        for i in range(2,months+1):
            Month = i
            Ticker = ticker
            amt = results.iloc[-1:]
            amt = amt['Amount'].values[0]
            if (i%dividends_compounding)==0:
                    total = amt+(amt*(interest/12))+monthly_contribution+(amt/avg_cost_per_share)*dividends
                    results =  results.append({'Month': Month, 'Amount': total, 'Ticker':Ticker}, ignore_index = True)
            else:
                    total = amt+(amt*(interest/12))+monthly_contribution
                    results =  results.append({'Month': Month, 'Amount': total, 'Ticker':Ticker}, ignore_index = True)
    return results

In [158]:
totalInvestmentPrediction(portfolio,monthly_investments,years_to_invest)

Unnamed: 0,Month,Amount,Ticker
0,1,2320.452287,FXAIX
1,2,4140.705902,FXAIX
2,3,5995.279811,FXAIX
3,4,7847.608441,FXAIX
4,5,9716.104760,FXAIX
...,...,...,...
751,104,42226.443827,FSRNX
752,105,42959.406381,FSRNX
753,106,43403.651239,FSRNX
754,107,43849.387738,FSRNX


In [159]:
# now we need to add up all of the rows that have the same years
def amountPeryear(Portfolio,Monthly_investments,Years_to_invest):
    total_portfolio = totalInvestmentPrediction(Portfolio,Monthly_investments,Years_to_invest)
    years = Years_to_invest  
    sum_portfolio = pd.DataFrame(columns = ['Year', 'Amount'])
    for i in range(1,years+1):
        per_year = total_portfolio[total_portfolio['Month']==i*12]['Amount'].sum()
        sum_portfolio =  sum_portfolio.append({'Year': i, 'Amount': per_year}, ignore_index = True)
        
    return sum_portfolio
    

In [160]:
amountPeryear(portfolio,monthly_investments,years_to_invest)

Unnamed: 0,Year,Amount
0,1.0,79422.73
1,2.0,162664.9
2,3.0,254300.6
3,4.0,355258.1
4,5.0,466572.1
5,6.0,589396.8
6,7.0,725019.2
7,8.0,874875.4
8,9.0,1040568.0


In [161]:
# graph the total data
total_investment = amountPeryear(portfolio,monthly_investments,years_to_invest)
fig = go.Figure(data=go.Scatter(x=total_investment['Year'], y=total_investment['Amount'], mode='lines', marker=dict(color='green')))
fig.show()