## Analysis of Robinhood Portfolio (robin_stocks)

Project by: Pete Aguirre II

In this project, I will do a retuern:risk analysis on my current Robinhood stock portfolio with the help of multiple 
tools using:
- Python 3
- Jupyter Lab/Notebook
- Beautiful Soup
- Markowitz Efficent Frontier

In [None]:
# Installing libraries 
# pip install pandas
# pip install numpy
# pip install matplotlib
# pip install robin_stocks

# Libraries Used 
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

import robin_stocks as r 
import pyotp

from pandas_datareader import data as wb

# Magic
%matplotlib inline

## 1.) Data Collection (updated)
Notes: Log in with Robinhood and portfolio will be gathered automatically

In [3]:
# Robinhood Log In 
email = input("Enter email: ")
password = input("Enter password: ")

totp = pyotp.TOTP("My2factorAppHere").now()
log_in = r.login(email, password, expiresIn=500, by_sms=True, mfa_code=totp)

# Get stocks value
stonks = r.build_holdings()

# Logout 
r.logout()

Enter email:  pete.jpa@gmail.com
Enter password:  Jp1035284!
Enter Robinhood code for validation:  041398


In [4]:
# Company tickers
# Since the company tickers are the keys, and the other variables are values, 
# create a dataframe for symbols first.
symbols_df = []
for k in  stonks.keys():
    symbols_df.append(k)
symbols_df = np.array(symbols_df)
symbols_df = pd.DataFrame(symbols_df, columns=['Symbol'])
symbols_df

Unnamed: 0,Symbol
0,AAPL
1,BABA
2,JD
3,XOM
4,LUV
5,DAL
6,PFE
7,BRK.B
8,AZN
9,ARKW


In [5]:
# Other headers 
# Create a dataframe for the other headers (which are values to the symbol keys).
# Drop some variables: percent_change, equity_change, type, and id 
# Rename the variables to something nicer looking
variables_df = pd.DataFrame.from_dict(stonks.values())
variables_df = variables_df.drop(['percent_change', 'equity_change', 'type', 'id'], axis=1)
variables_df = variables_df.rename(columns={'price':'Price', 'quantity':'Quantity', 'average_buy_price':'Average Price', 'equity':'Equity', 'name':'Name', 'pe_ratio':'P/E', 'percentage':'Percentage'})
variables_df

Unnamed: 0,Average Price,Equity,Name,P/E,Percentage,Price,Quantity
0,426.615,5345.88,Apple,34.6282,32.03,445.49,12.0
1,246.0856,2520.5,Alibaba,32.578,15.1,252.05,10.0
2,64.1613,494.64,JD.com,111.914,2.96,61.83,8.0
3,42.41,868.4,Exxon Mobil,25.987,5.2,43.42,20.0
4,31.7173,498.45,Southwest Airlines,82.2006,2.99,33.23,15.0
5,25.028,408.0,Delta Air Lines,,2.44,27.2,15.0
6,36.963,384.5,Pfizer,15.2185,2.3,38.45,10.0
7,208.93,1263.0,Berkshire Hathaway,51.332,7.57,210.5,6.0
8,56.577,555.1,AstraZeneca,68.5154,3.33,55.51,10.0
9,102.5551,72.79,ARK Next Generation Internet,-170.56676,0.44,100.4,0.724976


## 2.) Data Cleaning

In [7]:
# Concatinate both DataFrames 
my_portfolio = pd.concat([symbols_df, variables_df], axis=1)
my_portfolio

Unnamed: 0,Symbol,Average Price,Equity,Name,P/E,Percentage,Price,Quantity
0,AAPL,426.615,5345.88,Apple,34.6282,32.03,445.49,12.0
1,BABA,246.0856,2520.5,Alibaba,32.578,15.1,252.05,10.0
2,JD,64.1613,494.64,JD.com,111.914,2.96,61.83,8.0
3,XOM,42.41,868.4,Exxon Mobil,25.987,5.2,43.42,20.0
4,LUV,31.7173,498.45,Southwest Airlines,82.2006,2.99,33.23,15.0
5,DAL,25.028,408.0,Delta Air Lines,,2.44,27.2,15.0
6,PFE,36.963,384.5,Pfizer,15.2185,2.3,38.45,10.0
7,BRK.B,208.93,1263.0,Berkshire Hathaway,51.332,7.57,210.5,6.0
8,AZN,56.577,555.1,AstraZeneca,68.5154,3.33,55.51,10.0
9,ARKW,102.5551,72.79,ARK Next Generation Internet,-170.56676,0.44,100.4,0.724976


In [8]:
# Check variable types
my_portfolio.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 8 columns):
Symbol           10 non-null object
Average Price    10 non-null object
Equity           10 non-null object
Name             10 non-null object
P/E              9 non-null object
Percentage       10 non-null object
Price            10 non-null object
Quantity         10 non-null object
dtypes: object(8)
memory usage: 720.0+ bytes


In [9]:
# Change some values to floats and round them to 2 decimal numbers
my_portfolio['Price'] = my_portfolio['Price'].astype(float)
my_portfolio['Quantity'] = my_portfolio['Quantity'].astype(float)
my_portfolio['Average Price'] = round(my_portfolio['Average Price'].astype(float), 2)
my_portfolio['Equity'] = round(my_portfolio['Equity'].astype(float), 2)
my_portfolio['P/E'] = my_portfolio['P/E'].astype(float)
my_portfolio['Percentage'] = round(my_portfolio['Percentage'].astype(float), 2)
my_portfolio

Unnamed: 0,Symbol,Average Price,Equity,Name,P/E,Percentage,Price,Quantity
0,AAPL,426.62,5345.88,Apple,34.6282,32.03,445.49,12.0
1,BABA,246.09,2520.5,Alibaba,32.578,15.1,252.05,10.0
2,JD,64.16,494.64,JD.com,111.914,2.96,61.83,8.0
3,XOM,42.41,868.4,Exxon Mobil,25.987,5.2,43.42,20.0
4,LUV,31.72,498.45,Southwest Airlines,82.2006,2.99,33.23,15.0
5,DAL,25.03,408.0,Delta Air Lines,,2.44,27.2,15.0
6,PFE,36.96,384.5,Pfizer,15.2185,2.3,38.45,10.0
7,BRK.B,208.93,1263.0,Berkshire Hathaway,51.332,7.57,210.5,6.0
8,AZN,56.58,555.1,AstraZeneca,68.5154,3.33,55.51,10.0
9,ARKW,102.56,72.79,ARK Next Generation Internet,-170.56676,0.44,100.4,0.724976


In [10]:
# Check once more
# Note: at this point, numbers should be floats
my_portfolio.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 8 columns):
Symbol           10 non-null object
Average Price    10 non-null float64
Equity           10 non-null float64
Name             10 non-null object
P/E              9 non-null float64
Percentage       10 non-null float64
Price            10 non-null float64
Quantity         10 non-null float64
dtypes: float64(6), object(2)
memory usage: 720.0+ bytes


In [11]:
my_portfolio = my_portfolio[["Name","Symbol","Price","Quantity","Average Price","Equity","P/E","Percentage"]]
my_portfolio

Unnamed: 0,Name,Symbol,Price,Quantity,Average Price,Equity,P/E,Percentage
0,Apple,AAPL,445.49,12.0,426.62,5345.88,34.6282,32.03
1,Alibaba,BABA,252.05,10.0,246.09,2520.5,32.578,15.1
2,JD.com,JD,61.83,8.0,64.16,494.64,111.914,2.96
3,Exxon Mobil,XOM,43.42,20.0,42.41,868.4,25.987,5.2
4,Southwest Airlines,LUV,33.23,15.0,31.72,498.45,82.2006,2.99
5,Delta Air Lines,DAL,27.2,15.0,25.03,408.0,,2.44
6,Pfizer,PFE,38.45,10.0,36.96,384.5,15.2185,2.3
7,Berkshire Hathaway,BRK.B,210.5,6.0,208.93,1263.0,51.332,7.57
8,AstraZeneca,AZN,55.51,10.0,56.58,555.1,68.5154,3.33
9,ARK Next Generation Internet,ARKW,100.4,0.724976,102.56,72.79,-170.56676,0.44


In [12]:
# Convert any '.' to '-' in symbols
#sym = my_portfolio['Symbol']
my_portfolio['Symbol'] = my_portfolio['Symbol'].str.replace(".", "-")

In [13]:
# Confirm that '.' has been replaced by '-' on symbol
my_portfolio

Unnamed: 0,Name,Symbol,Price,Quantity,Average Price,Equity,P/E,Percentage
0,Apple,AAPL,445.49,12.0,426.62,5345.88,34.6282,32.03
1,Alibaba,BABA,252.05,10.0,246.09,2520.5,32.578,15.1
2,JD.com,JD,61.83,8.0,64.16,494.64,111.914,2.96
3,Exxon Mobil,XOM,43.42,20.0,42.41,868.4,25.987,5.2
4,Southwest Airlines,LUV,33.23,15.0,31.72,498.45,82.2006,2.99
5,Delta Air Lines,DAL,27.2,15.0,25.03,408.0,,2.44
6,Pfizer,PFE,38.45,10.0,36.96,384.5,15.2185,2.3
7,Berkshire Hathaway,BRK-B,210.5,6.0,208.93,1263.0,51.332,7.57
8,AstraZeneca,AZN,55.51,10.0,56.58,555.1,68.5154,3.33
9,ARK Next Generation Internet,ARKW,100.4,0.724976,102.56,72.79,-170.56676,0.44


## 3.) Data Collection (Historical Prices)

In [14]:
# Collect historical prices
tickers = my_portfolio['Symbol']
start_date = '2000-01-01'
my_data = pd.DataFrame()
for t in tickers:
    my_data[t] = wb.DataReader(t, data_source='yahoo', start=start_date)['Adj Close']

In [None]:
my_data = my_data.dropna()
my_data.info()

In [None]:
sec_returns = np.log(my_data/my_data.shift(1))
sec_returns

sec_returns.info()

In [None]:
sec_returns.head()

In [None]:
sec_returns.tail()

## Data Analysis

In [None]:
sec_returns

In [None]:
for t in tickers:

    
    print("")
    print("#",t)
    print("DAILY")
    print("Daily Return:", round(sec_returns[t].mean()*100, 4), "%")
    print("Daily Risk:", round(sec_returns[t].std()*100, 4), "%")

    print("")

    print("ANNUAL")
    mean_return = round((sec_returns[t].mean()*250)*100, 4)
    std_return = round((sec_returns[t].std()*250**.5)*100, 4)
    print("Annual Return:", mean_return, "%")
    print("Annual Risk:", std_return, "%")
    

    
    

In [None]:
sec_returns.idxmin() 

In [None]:
sec_returns.idxmax()

In [None]:
# Covariance 
# Determines if there is a relationship between two stocks, whether they move together:
# positively, negatively, or neutral 
return_cov = sec_returns.cov()
return_cov

In [None]:
# Covariance 
# Determines if there is a relationship between two stocks, whether they move together:
# positively, negatively, or neutral 
return_cov = sec_returns.cov()*250
return_cov

In [None]:
# Correlations
# Determines how close the relationship are between two stocks
return_corr = sec_returns.corr()
return_corr

In [None]:
# FIX THISSS
# Calculating Diversifiable and Non-Diversifiable

# Portfolio variance
pflio_var = np.dot(my_portfolio['Weight'].T, np.dot(sec_returns.cov()*250, my_portfolio['Weight']))
print("Portfolio Variance", pflio_var)

# Portfolio volatility 
#pflio_vol = (np.dot(my_portfolio['Weight'].T, np.dot(sec_returns.cov()*250, my_portfolio['Weight'])))**.5
#print("Portfolio Volatility:", pflio_vol)

# or...

pflio_vol2 = np.sqrt(pflio_var)
print("Portfolio Volatility:", pflio_vol2)


# Calculating variance annually for each tickers
var_a = []
for t in tickers:
    var_a.append(sec_returns[t].var()*250)

var_a = np.array(var_a)
variances = pd.DataFrame({'Name':names_txt, 'Variance':var_a})
print("")
# Diversifiable Risk
# div_risk = pflio_var - my_portfolio['Weight'][0]**2*variances['Variance'][0] - ...
#                  ... - my_portfolio['Weight'][n]**2*variances['Variance'][n]
div_risk = 0   
for i in range(len(my_portfolio)):
    if i==0:
        div_risk = pflio_var - my_portfolio['Weight'][0]**2*variances['Variance'][0]
    else:
        div_risk -= my_portfolio['Weight'][i]**2*variances['Variance'][i]
print("Diversifiable Risk:", div_risk)

print("")
# Non Diversifiable Risk
non_div_risk1 = pflio_var - div_risk
print("Non-Diversifiable Risk:", non_div_risk1)

print("")
non_div_risk2 = 0
for i in range(len(my_portfolio)):
    non_div_risk2 += my_portfolio['Weight'][i]**2*variances['Variance'][i]
    #print(my_portfolio['Name'][i], non_div_risk2)

print("")
print("Non-Diversifiable Risk:", non_div_risk2)
print("Non-Diversifiable Risk:", non_div_risk2 == non_div_risk1)


##test = my_portfolio['Weight'].diff()
#print(test)

In [None]:
no_assets = len(tickers)
no_assets

In [None]:
pflio_ret = []
pflio_vol = [] 
pflio_wei = []

for x in range(10000):
    weights = np.random.random(no_assets)
    weights /= np.sum(weights)
    
    pflio_wei.append(weights)
    pflio_ret.append(np.sum(weights*sec_returns.mean())*250)
    pflio_vol.append(np.sqrt(np.dot(weights.T, np.dot(sec_returns.cov()*250, weights))))
    #print(x, weights)

pflio_wei = np.array(pflio_wei)
pflio_ret = np.array(pflio_ret)
pflio_vol = np.array(pflio_vol)

#print(np.sum(weights))
#pflio_returns, pflio_volatilities

In [None]:
pflio_scenarios = pd.DataFrame({'Return': pflio_ret, 'Volatility': pflio_vol})
pflio_scenarios = pflio_scenarios.sort_values('Return', ascending=True)

In [None]:
pflio_scenarios.head()
#pflio_scenarios['Return']

In [None]:
pflio_scenarios.tail(3500)

In [None]:
# Efficient Frontier 
pflio_scenarios.plot(x='Volatility', y='Return', kind='scatter', figsize=(10,6));
plt.xlabel('Expected Volatility')
plt.ylabel('Expected Return')
eff_front = plt.savefig("efficient_frontier2.png")

In [None]:
# For Testing purposes
pfolio_wei2 = []
tickers2 = []
#print(pflio_wei[3273])
tickers2 = np.array(tickers)
#print(tickers2)
#print(np.sum(pflio_wei[3273]))
pflio_wei2 = pflio_wei[3273]


#ideal_portfolio = pd.DataFrame(columns=tickers2)
#ideal_portfolio = pd.DataFrame({'Symbol': tickers2, 'Weights': pflio_wei2})
ideal_portfolio = pd.DataFrame({'Weights': pflio_wei2})
ideal_portfolio = pd.concat([symbols, ideal_portfolio], axis=1)
ideal_portfolio.to_csv('ideal_weights.csv', index=False)
ideal_portfolio

## Data Visualization

In [None]:
(my_data/my_data.iloc[0]*100).plot(figsize=(20,15))

In [None]:
sns.set_style('whitegrid')
regression = sns.pairplot(sec_returns[1:], kind="reg")
regression = regression.savefig("regression.png")
regression

In [None]:
plt.subplots(figsize=(15, 15))
heatmap = sns.heatmap(return_corr, annot=True, square=True, cmap='coolwarm')
heatmap.savefig("heatmap.png")
heatmap
#plt.show()

## TO DO LIST:
- Create a daily, weekly, quarterly, yearly portfolio performance
- Learn how to create pie graphs
- Create interactive dashboards 
- Fix bugs on diversifiable and non-diversifiable ortfolio risk analysis 
- Learn Monte Carlo Simulations