# Individual Stock Data Collection and Wrangling

In [25]:
# libraries
import pandas as pd
import yfinance as yf
from datetime import date
import numpy as np

goal: make functions for get  
- 1st of the month cost per share for a given time period
- quarterly dividend amt per share for a given time period
- calculate the average dividend 
- calculate the average monthly cost per share

In [26]:
# get the TICKER for the stocks we are interested in
# put these tickers in a dictionary
investments = pd.DataFrame({'ticker':['FXAIX','FSSNX','FSPSX','VDADX','FXNAX','VGAVX','FSRNX'],'Quantity':[16.81,18.957,11.455,76.756,12.256,197.257,18.878]})
investment_tickers = investments['ticker']
# get the number of years the user has been investing
years_invested = 5
# get the number of years the user plans to invest 
years_future_invest = 8

In [27]:
# calculate the year the individual started investing
today = date.today()
year_start_investing = today.year - years_invested
month = today.month
year = today.year

In [28]:
# Function to connect to yahoo finance and create soup object
def connectYahooFinance (ticker):
    ticker_data = yf.Ticker(ticker)
    ticker_data = pd.DataFrame(ticker_data.history(period = 'max'))
    ticker_data.reset_index(inplace = True)
    return ticker_data

In [29]:
connectYahooFinance('FXAIX')


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2011-05-04,37.664501,37.664501,37.664501,37.664501,0,0.0,0
1,2011-05-05,37.325256,37.325256,37.325256,37.325256,0,0.0,0
2,2011-05-06,37.475151,37.475151,37.475151,37.475151,0,0.0,0
3,2011-05-09,37.640827,37.640827,37.640827,37.640827,0,0.0,0
4,2011-05-10,37.948521,37.948521,37.948521,37.948521,0,0.0,0
...,...,...,...,...,...,...,...,...
2870,2022-09-28,129.559998,129.559998,129.559998,129.559998,0,0.0,0
2871,2022-09-29,126.839996,126.839996,126.839996,126.839996,0,0.0,0
2872,2022-09-30,124.930000,124.930000,124.930000,124.930000,0,0.0,0
2873,2022-10-03,128.169998,128.169998,128.169998,128.169998,0,0.0,0


In [30]:
# function to get historical monthly cost per share
def MonthlyCost (ticker):
    # first call the connect to yahoofinance function
    ticker_data = connectYahooFinance(ticker)
    # filter data to only include info from the first of the month
    return ticker_data[ticker_data['Date'].dt.is_month_start]

In [31]:
MonthlyCost('FXAIX')

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
19,2011-06-01,36.828205,36.828205,36.828205,36.828205,0,0.000,0
41,2011-07-01,37.593502,37.593502,37.593502,37.593502,0,0.000,0
61,2011-08-01,35.968250,35.968250,35.968250,35.968250,0,0.000,0
84,2011-09-01,33.751289,33.751289,33.751289,33.751289,0,0.000,0
126,2011-11-01,34.243111,34.243111,34.243111,34.243111,0,0.000,0
...,...,...,...,...,...,...,...,...
2747,2022-04-01,157.093353,157.093353,157.093353,157.093353,0,0.000,0
2788,2022-06-01,142.105713,142.105713,142.105713,142.105713,0,0.000,0
2809,2022-07-01,132.740005,132.740005,132.740005,132.740005,0,0.577,0
2829,2022-08-01,143.059998,143.059998,143.059998,143.059998,0,0.000,0


In [32]:
# function to scrape historical dividend amt per share
def quarterlyDividends (ticker):
    # first call the conncet to yahoofinance function
    ticker_data = connectYahooFinance(ticker)
    # filter data to only include info when dividends were distributed
    return ticker_data[ticker_data['Dividends']!=0]

In [33]:
quarterlyDividends('FXAIX')

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
109,2011-10-07,32.458397,32.458397,32.458397,32.458397,0,0.223,0
158,2011-12-16,34.407173,34.407173,34.407173,34.407173,0,0.256,0
232,2012-04-04,39.71674,39.71674,39.71674,39.71674,0,0.186,0
296,2012-07-06,38.679863,38.679863,38.679863,38.679863,0,0.288,0
360,2012-10-05,41.944382,41.944382,41.944382,41.944382,0,0.282,0
407,2012-12-14,40.780766,40.780766,40.780766,40.780766,0,0.345,0
482,2013-04-05,45.095238,45.095238,45.095238,45.095238,0,0.271,0
545,2013-07-05,47.637569,47.637569,47.637569,47.637569,0,0.279,0
609,2013-10-04,49.607006,49.607006,49.607006,49.607006,0,0.309,0
658,2013-12-13,52.317486,52.317486,52.317486,52.317486,0,0.344,0


In [34]:
# function to calcuate the average dividends for each ticker
def CalculateAvgDividend(ticker):
    dividends = pd.DataFrame(quarterlyDividends(ticker))
    # need to get the data of the past five years
    five_years_ago =f'{today.year-5}-01-01'
    dividends = dividends[(dividends['Date']> five_years_ago)]
    average_dividend = dividends['Dividends'].mean()
    return average_dividend

In [35]:
CalculateAvgDividend('FXAIX')

0.4920454545454545

In [36]:
# function to calc avg cost per share for each ticker
def CalculateAvgCostPerShare(ticker):
    cost_per_share = pd.DataFrame(MonthlyCost(ticker))
    five_years_ago =f'{today.year-5}-01-01'
    cost_per_share = cost_per_share[(cost_per_share['Date']>five_years_ago)]
    average_cost = cost_per_share['Open'].mean()
    return average_cost

In [37]:
CalculateAvgCostPerShare('FXAIX')

110.53347446607506

In [38]:
# how much is your current initial investment worth
def calcCurrentWorth (ticker):
    quantity = investments.loc[investments['ticker'] == ticker]['Quantity']
    cost_per_share = pd.DataFrame(MonthlyCost(ticker))
    cost_per_share = cost_per_share.iloc[-1:]
    current_value = cost_per_share['Open'].values[0]*quantity.values[0]
    return current_value

In [39]:
calcCurrentWorth('FXAIX')

2320.452287139892

In [45]:
# calculate the annual interest rate of the individual stocks
def interestRate(ticker):
    # get a dataframe of the monthly costs for the specific ticker
    cost_per_share = pd.DataFrame(MonthlyCost(ticker))
    # get the current cost per share of the ticker
    current_value = cost_per_share.iloc[-1:]   
    current_value = current_value['Open'].values[0]
    # get the cost per share five years ago
    old_date =f'{today.year-6}-12-01'
    old_value = cost_per_share.loc[cost_per_share['Date']==old_date]
    old_value = old_value['Open'].values[0]
    # calculate the compounding annual growth rate of the stock
    CAGR = (current_value/old_value)**(1/(year-2015))-1
    return CAGR

In [46]:
interestRate('FXAIX')

0.10473965835058996

In [47]:
# compounding calculation
def compoundingCalc (ticker, monthly_investment, years_future_invest):
    # assign the variables for our calculation
    principal = calcCurrentWorth(ticker)            # find the current worth of the stock
    interest = interestRate(ticker)                 # find the growth rate of the stock over the past five years
    compounding_period = 12                         # assign how often the interest will compound, 12 = monthly
    year = years_future_invest                      # assign how lond the user plans to invest for
    monthly_contribution = monthly_investment       # assign how much the user plans to invest in this stock per month
    # create a dataframe to gold our results
    results = pd.DataFrame(columns = ['Year', 'Amount'])
    
    # itereate through the years the user plans to invest for
    for i in range(1,year+1):
        Year = i
        # perform the actual calculation
        Amount = principal*np.power((1 + interest / compounding_period), compounding_period * i)+(monthly_contribution)*(np.power((1 + interest / compounding_period), compounding_period * i)-1)/(interest / compounding_period)
        # add the results to the dataframe
        results =  results.append({'Year': Year, 'Amount': Amount}, ignore_index = True)
    return results

In [48]:
compoundingCalc('FXAIX',1000,8)

Unnamed: 0,Year,Amount
0,1.0,15168.671149
1,2.0,29429.126417
2,3.0,45257.04674
3,4.0,62824.723022
4,5.0,82323.383845
5,6.0,103965.277048
6,7.0,127985.980092
7,8.0,154646.964385


In [66]:
# add in dividend reinvestment
# compounding calculation
def compoundingCalcDividends (ticker, monthly_investment, years_future_invest):
    # assign the variables for our calculation
    principal = calcCurrentWorth(ticker)                        # find the current worth of the stock
    interest = interestRate(ticker)                             # find the growth rate of the stock over the past five years
    compounding_period = 12                                     # assign how often the interest will compound, 12 = monthly
    years = years_future_invest                                 # assign how lond the user plans to invest for
    monthly_contribution = monthly_investment                   # assign how much the user plans to invest in this stock per month
    dividends = CalculateAvgDividend(ticker)                    # calculate the average dividends returned
    dividends_compounding = 3                                   # the dividends compound quarterly
    avg_cost_per_share = CalculateAvgCostPerShare(ticker)       # calculate the average cost per share of the stock
    # create a dataframe to gold our results
    results = pd.DataFrame(columns = ['Year', 'Amount','Ticker'])
    
    # itereate through the years the user plans to invest for
    for i in range(1,years+1):
        Year = i
        # perform the actual calculation
        Amount = principal*np.power((1 + interest / compounding_period), compounding_period * i)\
                +(monthly_contribution)*(np.power((1 + interest / compounding_period), compounding_period * i)-1)/(interest / compounding_period)\
                +((principal/avg_cost_per_share)*dividends)*(np.power((1 + interest / dividends_compounding), dividends_compounding * i)-1)/(interest / dividends_compounding)
        Ticker = ticker
        # add the results to the dataframe
        results =  results.append({'Year': Year, 'Amount': Amount,'Ticker':Ticker}, ignore_index = True)
    return results

In [67]:
df=compoundingCalcDividends('FXAIX',1000,8)
df

Unnamed: 0,Year,Amount,Ticker
0,1,15200.754792,FXAIX
1,2,29496.773317,FXAIX
2,3,45364.113892,FXAIX
3,4,62975.48573,FXAIX
4,5,82522.581092,FXAIX
5,6,104218.161783,FXAIX
6,7,128298.374974,FXAIX
7,8,155025.323567,FXAIX


In [80]:
portfolio = pd.DataFrame({
    'ticker':['FXAIX','FSSNX','FSPSX','VDADX','FXNAX','VGAVX','FSRNX'],
    'quantity':[16.81,18.957,11.455,76.756,12.256,197.257,18.878],
    'category':['stock','stock','stock','bond','bond','bond','real estate'],
    'future_percents':[30,25,25,5,5,5,5]
})
# monthly amount to invest total
monthly_investments = 3000
# Number of years plan to invest
years_to_invest = 8

# make a dataframe that has all of the investments
def totalInvestmentPrediction (Portfolio,Monthly_investments,Years_to_invest):
    total_portfolio = []
    for ticker, percent in zip(Portfolio['ticker'], Portfolio['future_percents']):
        invest_per_month = percent*Monthly_investments
        calc = compoundingCalcDividends(ticker,invest_per_month,Years_to_invest)
        total_portfolio.append(calc)

    return total_portfolio

In [81]:
totalInvestmentPrediction(portfolio,monthly_investments,years_to_invest)