# CPS600 - Python Programming for Finance 
###  
<img src="https://www.syracuse.edu/wp-content/themes/g6-carbon/img/syracuse-university-seal.svg?ver=6.3.9" style="width: 200px;"/>

## Lab: Portfolio Optimization

###  Name: `Wei Mu`  SUID:`956340308`
###  November 6, 2018


**Exercise 0**

Write a function that takes a list of ticker symbols and returns the largest Sharpe ratio achievable with a portfolio built from those stocks.

In [30]:
import numpy as np
import scipy.optimize as sco
import pandas as pd
import fix_yahoo_finance as yf


def statistics(weights, rf, rets):
    ''' Return portfolio statistics.
    
    Parameters
    ==========
    weights : array-like
        weights for different securities in portfolio
    rf : float
        annual risk free rate
    rets : pandas.core.frame.DataFrame
        log return frame
    
    Returns
    =======
    pret : float
        expected portfolio return
    pvol : float
        expected portfolio volatility
    (pret - rf) / pvol : float
        Sharpe ratio
    '''
    weights = np.array(weights)
    pret = np.sum(rets.mean() * weights) * 252
    pvol = np.sqrt(np.dot(weights.T, np.dot(rets.cov() * 252, weights)))
    return np.array([pret, pvol, ((pret - rf) / pvol)])


def min_func_sharpe(weights, rf, rets):
    return -statistics(weights, rf, rets)[2]


def fetchYahooData(symbols, strStartDate, strEndDate,  blnExcludeNanSymbol=False, intRetry=3):
    ''' Download historical adjusted closing price from Yahoo Finance.
    Data is downloaded via scraping method.
    See fix-yahoo-finance document for detailed description.
    Please note that currently scraping is not very stable.
    
    Parameters
    ==========
    symbols : list of strings
        Tickers must be available in Yahoo! Finance.
    strStartDate : string
        '%Y-%m-%d' example: "2018-01-01"
    strEndDate : string
        '%Y-%m-%d' example: "2018-12-01"
    blnExcludeNanSymbol: boolean
        Exclude symbols which does not have long enough data 
        or symbols whose data contains NaNs.
    intRetry : integer
        Maximum number of retrys. By default, it is 3 attempts.
        Try use a non-exist symbol to test it.

    Returns
    =======
    symbolsOutput : list of strings
        Tickers excluding symbols which does not have long enough data.
        If blnExcludeNanSymbol is False, which is by default, returns the
        original symbols list without changing.
    frame : pandas dataframe object
        A dataframe of adjusted closing prices of the symbols.
    '''
    for i in range(intRetry):
        try:
            data = yf.download(
                tickers = symbols, 
                start = strStartDate, 
                end = strEndDate,
                group_by = 'ticker'
            )
            frame = pd.DataFrame({c:data[c]['Adj Close'] for c in symbols}, columns=symbols)
        except:
            continue
        break
    else:
        # In python for...else is executed if the loop does not break
        msg = 'Fetching Yahoo Finance data failed. Please check if all symbols are valid, ' + \
                  'or restart Python kernel and try again.'
        raise AssertionError(msg)

    problemSymbolList = []
    for sTemp in symbols:
        if data[sTemp].isnull().values.any():
            problemSymbolList.append(sTemp)

    if data.isnull().values.any():
        print('Warning: some of the data are missing. ', 
              'Please check if all the symbols have long enough history.')
        print('Symbols in question: ')
        print(problemSymbolList)
        if blnExcludeNanSymbol:
            frame.drop(problemSymbolList, axis=1, inplace=True)
            symbolsOutput = [e for e in symbols if e not in problemSymbolList]
            print('Symbols above are excluded from portfolio.')
        else:
            symbolsOutput = symbols
    else:
        symbolsOutput = symbols
    return [symbolsOutput, frame]


def portfolio_Optimize(symbols, frame, rf, silent=False):
    ''' Optimize portfolio weights by maximizing Sharpe ratio.
    
    Parameters
    ==========
    symbols : list of strings
        List of ticker symbols.
    frame : pandas dataframe object
        A dataframe of (adjusted) closing prices of the symbols.
    rf : float
        annual risk free rate
    silent : boolean
        suppress output if True

    Returns
    =======
    symbols : list of strings
        Tickers in current portfolio.
        if blnExcludeNanSymbol is True, symbols which does not have
        long enough data will be removed.
    optWeights : array-alike
        Optimal portfolio weights
    sharpeRatio : float
        Sharpe ratio of the optimal portfolio
    '''    
    noa = len(symbols)
    rets = np.log(frame / frame.shift(1))
    cons = ({'type': 'eq', 'fun': lambda x:  np.sum(x) - 1})
    bnds = tuple((0, 1) for x in range(noa))
    opts = sco.minimize(min_func_sharpe, noa * [1. / noa,], args=(rf, rets, ),
                        method='SLSQP', bounds=bnds, constraints=cons)
    
    optWeights = opts['x']
    stats = statistics(opts['x'], rf, rets)
    sharpeRatio = stats[2]
    
    if not silent:
        print('Symbols:',  symbols)
        print('Weights:', optWeights.round(3))
        print('Mean, Sigma, Sharpe Ratio:', stats.round(3))

    return [symbols, optWeights, sharpeRatio]


symbols = ['AAPL', 'GOOG', 'MSFT', 'DB', 'GLD']
start = "2015-01-01"
end = "2018-12-01"
rf = 0.01

symbolsOutput, frame = fetchYahooData(symbols, start, end)
portfolio_Optimize(symbolsOutput, frame, rf);

[*********************100%***********************]  5 of 5 downloaded
Symbols: ['AAPL', 'GOOG', 'MSFT', 'DB', 'GLD']
Weights: [0.    0.157 0.747 0.    0.096]
Mean, Sigma, Sharpe Ratio: [0.213 0.201 1.008]


In [8]:
# SND went IPO on Nov 4, 2016 so it does not have long enough history
symbols = ['AAPL', 'GOOG', 'MSFT', 'DB', 'GLD', 'SND']
symbolsOutput, frame = fetchYahooData(symbols, start, end, blnExcludeNanSymbol=True)
portfolio_Optimize(symbolsOutput, frame, rf);

[*********************100%***********************]  6 of 6 downloaded
Symbols in question: 
['SND']
Symbols above arw excluded from portfolio.
Symbols: ['AAPL', 'GOOG', 'MSFT', 'DB', 'GLD']
Weights: [0.    0.157 0.747 0.    0.096]
Mean, Sigma, Sharpe Ratio: [0.213 0.201 1.008]


**Exercise 2**

Use your functions from the previous exercises. Get a large list of ticker symbols, say [here](http://www.nasdaq.com/screening/companies-by-industry.aspx?exchange=NASDAQ&render=download), and find a portfolio of $5$ stocks together with appropriate weights such that the expected return is maximized.

Try the same for Sharpe ratio.

In other words, try out many different combinations of $5$ stocks and see how high you can get the expected return and Sharpe ratio.

Have fun with it, automate it as much or as little as you like. The challenge is to find out just how well you can do.

The winner will be awarded with a special prize.

***Answer:***

As an example, select Large-cap public traded technology companies which has at least 5 years of historical prices:

In [57]:
from urllib.request import urlopen
import requests
import itertools


largeCapList = urlopen("https://www.nasdaq.com/screening/companies-by-industry.aspx?marketcap=Large-cap&render=download")
with open('LargeCap.csv','wb') as output:
    output.write(largeCapList.read())
df = pd.read_csv("LargeCap.csv") 
df.head()

Unnamed: 0,Symbol,Name,LastSale,MarketCap,ADR TSO,IPOyear,Sector,Industry,Summary Quote,Unnamed: 9
0,MMM,3M Company,207.92,121069100000.0,,,Health Care,Medical/Dental Instruments,https://www.nasdaq.com/symbol/mmm,
1,ABB,ABB Ltd,20.28,43232980000.0,,,Consumer Durables,Electrical Products,https://www.nasdaq.com/symbol/abb,
2,ABT,Abbott Laboratories,74.05,130056500000.0,,,Health Care,Major Pharmaceuticals,https://www.nasdaq.com/symbol/abt,
3,ABBV,AbbVie Inc.,94.27,141802500000.0,,2012.0,Health Care,Major Pharmaceuticals,https://www.nasdaq.com/symbol/abbv,
4,ABMD,"ABIOMED, Inc.",332.68,14983750000.0,,,Health Care,Medical/Dental Instruments,https://www.nasdaq.com/symbol/abmd,


In [58]:
df = df.drop(df[df['IPOyear']>2012].index)
df = df.loc[df['Sector']=='Technology']
print(df.shape[0], 'companies in total.')

71 companies in total.


*Too many combinations.* To reduce to an acceptable length, use some screening method like the following.

- Select companies that have PEG ratios less than 1.
- Scrape PEG ratio from Yahoo Finance and drop the tickers with no trailing PEG information available (N/As).

In [60]:
symbols = df['Symbol'].values.tolist()
symbols_copy = symbols.copy()
PEG = []
#PE = []

for ticker in  symbols:
    params = {"formatted": "true",
            "crumb": "AKV/cl0TOgz", # works without so not sure of significance
            "lang": "en-US",
            "region": "US",
            "modules": "defaultKeyStatistics,summaryDetail",
            "corsDomain": "finance.yahoo.com"}

    r = requests.get("https://query1.finance.yahoo.com/v10/finance/quoteSummary/" + ticker, params=params)
    data = r.json()['quoteSummary']["result"][0]
#     if 'trailingPE' in data['summaryDetail']:
#         trailingPE = data['summaryDetail']['trailingPE']['raw']
#         print(ticker)
#         PE.append(trailingPE)
#     else:
#         symbols_copy.remove(ticker)
#         print('Symbol removed:', ticker)
    if 'raw' in data['defaultKeyStatistics']['pegRatio']:
        pegRatio = data['defaultKeyStatistics']['pegRatio']['raw']
        print(ticker)
        PEG.append(pegRatio)
    else:
        symbols_copy.remove(ticker)
        print('Symbol removed:', ticker)

print(PEG)

ATVI
ADBE
AMD
Symbol removed: AABA
ADI
ANSS
AMAT
ASML
ADSK
ADP
BIDU
AVGO
CDNS
CERN
CHKP
CTXS
CTSH
DVMT
DOV
DXC
ETN
EA
ERIC
FFIV
FISV
FTNT
HPQ
INFO
ITW
INFY
IBM
INTU
JKHY
LLL
LRCX
MRVL
MXIM
MCHP
MU
MSI
NTAP
NOK
NVDA
NXPI
OMC
ORCL
PANW
PTC
QCOM
RHT
CRM
SAP
STX
NOW
SWKS
SPLK
SSNC
STM
SYMC
SNPS
TSM
TTWO
TXN
VRSN
VRSK
VMW
WDC
WIT
WDAY
Symbol removed: WPP
XLNX
[1.63, 1.01, 77.95, 1.76, 1.81, 0.62, 1.17, 1.73, 1.6, 52.81, 0.76, 2.05, 2.88, 2.71, 1.76, 1.15, 0.78, 1.22, 0.75, 1.34, 1.44, 0.14, 1.84, 2.11, 1.47, 0.9, 2.19, 1.52, 2.23, 9.28, 2.1, 3.29, 1.14, 0.89, 0.9, 1.49, 0.9, 0.15, 1.18, 0.85, 1.19, 1.37, 0.84, 1.95, 1.73, 1.08, 1.46, 1.33, 3.1, 1.68, 2.14, 0.82, 1.56, 0.9, 2.07, 0.67, 0.22, 1.16, 2.23, 1.06, 0.64, 1.3, 4.01, 2.51, 2.5, -2.46, 2.35, 3.91, 1.56]


In [61]:
peg = np.array(PEG)
len(peg[peg<1])

18

Acceptable computational complexity.

In [62]:
df = pd.DataFrame({'Symbol':symbols_copy, 'PEG':PEG})
df = df[df['PEG']<1]

In [63]:
df

Unnamed: 0,Symbol,PEG
5,AMAT,0.62
10,AVGO,0.76
16,DVMT,0.78
18,DXC,0.75
21,ERIC,0.14
25,HPQ,0.9
33,LRCX,0.89
34,MRVL,0.9
36,MCHP,0.9
37,MU,0.15


In [64]:
longSymbolList = df['Symbol'].values.tolist()
start = "2013-01-01"
end = "2018-12-01"
rf = 0.01

In [65]:
symbolsOutput, frame = fetchYahooData(longSymbolList, start, end, blnExcludeNanSymbol=True)

[*********************100%***********************]  18 of 18 downloaded
Symbols in question: 
['DVMT', 'DXC']
Symbols above arw excluded from portfolio.


In [66]:
iterComb = itertools.combinations(symbolsOutput, 5)

In [67]:
results = []
i = 0
index = 0
maxSharpeRatio = float("-inf")
for fiveSymbols in iterComb:
    symbols = list(fiveSymbols)
    currentResult = portfolio_Optimize(symbols, frame[symbols], rf, silent=True)
    results.append(currentResult)
    if currentResult[2] > maxSharpeRatio:
        index = i
        maxSharpeRatio = currentResult[2]
    i = i + 1

print('==========================================================================')
print('Optimal combination:')
print('Symbols:', results[index][0])
print('Optimal weights:', results[index][1].round(4))
print('Optimal Sharpe ratio:', results[index][2].round(4))
    

Optimal combination:
Symbols: ['AVGO', 'HPQ', 'LRCX', 'SSNC', 'TTWO']
Optimal weights: [0.2971 0.1799 0.     0.1915 0.3315]
Optimal Sharpe ratio: 1.4271
