In [134]:
from tabulate import tabulate
import math
import yahoo_finance as yf
import pandas as pd
import numpy as np

class Portfolio(object):
    '''Define a Portfolio of stocks and their weights.
    input data has to be a dictionary with NYSE stock symbols and the proportion as value.
    '''
    def __init__(self,stocksnweights,equity=0.): 
        # Overloading, python-like: 
        if type(stocksnweights) == set or type(stocksnweights) == list: # list -> Yahoo symbols only
            self.composition = pd.DataFrame(np.zeros(len(stocksnweights)),
                                            index=[s for s in stocksnweights],
                                            columns=['norm proportion']
                                           )
        elif type(stocksnweights) == dict: # dict -> Symbols = keys, proportions = values
            self.composition = pd.DataFrame(list(stocksnweights.values()),
                                            index=[list(stocksnweights.keys())],
                                            columns=['norm proportion']
                                           )
        else:
            print('Type ERROR')
            return
            
        self._check_comp()
        self.equity = equity
    
    def _check_comp(self):
        s = self.composition['norm proportion'].sum()
        if s == 0.:
            #print("Empty Portfolio Initialized:")
            pass
        elif s != 1.: 
            #print("Sum of weights = ",s)
            #print(" -> Normalizing...")
            for sym in self.composition.index.values:
                self.composition.ix[sym] = self.composition.ix[sym]/s
        #print (tabulate(self.composition, headers=['Symbols','norm proportion']))
        #print("\n")
                
    def load_timeseries(self,time_span):
        '''Put stocks data into a pandas Panel.
        time span has to be a list of strings with format "yyyy-mm-dd", with ordering order "begin", "end".
        '''
        print("Fetching time series data from {0} to {1} :".format(*time_span))
        dict_temp = {}
        for sym in self.composition.index:
            print('Loading "{0}" data...'.format(sym))
            # Fetch
            stock = yf.Share(sym)
            data = pd.DataFrame.from_dict(stock.get_historical(*time_span))
            # Data Cleansing
            del data['Symbol']
            del data['Volume']
            data.set_index("Date", inplace=True)
            data.sort_index(inplace=True)
            data = data.astype("float64")
            # Adding features
            data['Return'] = (data['Close']/data['Close'].shift(+1,axis='index')-1.)*100.
            data['Return'].ix[0] = (data['Close'].ix[0]/data['Open'].ix[0]-1.)*100.
            data['Norm_Close'] = data['Close']/data['Open'].ix[0]*100.
            dict_temp[sym] = data
        self.stocks = pd.Panel.from_dict(dict_temp, orient='minor')
        # Stocks on different markets might be traded on different days: this results in NaN when dates do not align 
        for it in self.stocks.items:
            # multiple subscripting[][] is badly implemented: slow, returns cascade of copies -> inplace does not work
            # better to use loc(item/s,major_axis,minor_axis
            self.stocks.loc[it].fillna(method='pad',inplace=True)
            self.stocks.loc[it].fillna(method='bfill',inplace=True)
        print("Loaded time series of selected shares :")
        print(self.stocks.axes)
        
    def set_shares(self,composition,equity=1.):
        if equity != 0.: 
            self.equity = equity
        elif self.equity == 0.:
            print("Equity set to 1 [arb. units]")
        for symbol in composition.keys():
            self.composition['norm proportion'].ix[symbol] = composition[symbol]
        self._check_comp()
        self._weights = self.composition.copy('deep')
        for sy in self._weights.index:
            self._weights.ix[sy] = self.composition.ix[sy]/self.stocks['Open'][sy].ix[0]
        self._weights.columns = ['weight-per-share']
        self.stocks['Share'] = np.ones((len(self.stocks['Open'].index),len(self._weights.index)))
        self.stocks['Share'] = self.stocks['Share']*self._weights.transpose().ix['weight-per-share']*self.equity
        #print("Calculated weights given opening prices:")
        #print(self.stocks['Open'].ix[0])
        #print(tabulate(self._weights,headers=['Symbol','weight-per-share']))
    

    def get_stats(self):
        cols = ['Adj_Close','Close','Open']
        self.data = pd.DataFrame(np.zeros((len(self.stocks[cols[0]].index),len(cols))),
                                 columns=cols,
                                 index=self.stocks[cols[0]].index
                                )
        for col in cols:
            self.data[col] = self.stocks[col].dot(self._weights)*self.equity
        self.data['Return'] = (self.data['Close']/self.data['Close'].shift(+1,axis='index')-1)*100.
        self.data['Return'].ix[0] = (self.data['Close'].ix[0]/self.data['Open'].ix[0]-1)*100. 
        self.data['Norm_Close'] =  self.data['Close']/self.data['Open'].ix[0]*100.
        self.stats = {}
        self.stats['av_daily_return'] = self.data['Return'].mean()
        self.av_daily_return =  self.stats['av_daily_return']
        self.stats['volatility'] = self.data['Return'].std()
        self.volatility = self.stats['volatility']
        self.stats['sharpe'] = math.sqrt(len(self.data.index))*self.av_daily_return/self.volatility
        self.sharpe = self.stats['sharpe']
        return self.av_daily_return, self.volatility, self.sharpe