In [189]:
%%writefile marketsym.py

import os
import pandas as pd
import numpy as np
import time
import random
import datetime
import matplotlib.pyplot as plt
import scipy.optimize as spo

from sys import argv


def symbol_to_path(symbol, base_dir='C:/Users/Pietro T/Documenti/ML_and_finance/exercises/data'):
    """Returns the file path given the symbol"""
    return os.path.join(base_dir, '{}.csv'.format(str(symbol)))


def get_symbols_from_txt(filename, base_dir='C:/Users/Pietro T/Documenti/ML_and_finance/exercises/data'):
    """Read a list of symbols from a file to a list"""
    f = open(os.path.join(base_dir, filename))
    sy = f.read().splitlines()
    f.close()
    
    return sy


def normalize_data(df):
    '''Normalize stock prices to have initial value equal to 1'''
    return df/df.ix[0,:]


def get_data(symbo, dates, col='Adj Close', printerror=True):
    """"For the given sybols, returns the values of cols in the range dates"""
    df = pd.DataFrame(index=dates)
    symbols = symbo.copy()
    if 'SPY' not in symbols:
        symbols.insert(0, 'SPY')
    
    for s in symbols:
        try:
            df_temp = pd.read_csv(symbol_to_path(s), index_col='Date', parse_dates=True, usecols=['Date', col],
                             na_values=['nan'])

            df_temp = df_temp.rename(columns={col:s})
            df = df.join(df_temp)
        except OSError:
            if printerror:
                print('File {}.csv not found, skipping the associated column.'.format(symbo))
        
        if s == 'SPY':
            df = df.dropna(subset=['SPY'])
        
    df.fillna(method='ffill', inplace=True)
    df.fillna(method='bfill', inplace=True)
    
    return df


def get_daily_returns(df):
    dr = df.copy()
    dr = (df/ df.shift(1)) - 1
    dr.ix[0,:] = 0
    
    return dr


class Portfolio:
    """Portfolio object. In the amount mode stores the quantities of each stocks"""
    
    def __init__(self, initial_money=0, symbols=[], order_list=[]):
        """initializes with initial money and a list of symbols.
        Order list should be a list of dicts, with key equal to the symbol, with keys 'date', 'symb', 'is_buy', 'amount'.
        """
        
        #create the symbols list, adding 'Cash'
        self.symbols = list(symbols)
        if 'Cash' not in self.symbols:
            self.symbols.insert(0, 'Cash')
        
        #creates the dict where store the participation of the portfolio
        self.amount = dict()
        for s in self.symbols:
            self.amount[s] = 0
        self.amount['Cash'] = initial_money
        
        #initializes with the order lists
        for order in order_list:
            self.execute_order(date=order['date'], symbol=order['symbol'], is_buy=order['is_buy'],
                              amount=order['amount'], p_output=True)            
            
            
    def execute_order(self, date, symbol, is_buy, amount, p_output=True):
        try:
            dates = pd.date_range(date, date)
            asset_value = float(get_data([symbol], dates, printerror=False)[symbol].values)
            old_cash = self.amount['Cash']
            old_asset = self.amount[symbol]
            
            if np.isnan(asset_value):
                print('Symbol {} not trading on day {}. Order aborted.'.format(symbol, date))
            else:            
                if is_buy:
                    if self.amount[symbol] >= 0:
                        self.amount['Cash'] -= amount * asset_value
                    elif self.amount[symbol] <= - amount:
                        self.amount['Cash'] += amount * asset_value
                    else:
                        self.amount['Cash'] -= (amount - 2*abs(self.amount[symbol])) * asset_value
                    self.amount[symbol] += amount

                else:
                    if self.amount[symbol] <= 0:
                        self.amount['Cash'] -= amount * asset_value
                    elif self.amount[symbol] >= amount:
                        self.amount['Cash'] += amount * asset_value
                    else:
                        self.amount['Cash'] += (amount - 2*abs(self.amount[symbol])) * asset_value
                    self.amount[symbol] -= amount

                if self.amount['Cash'] < 0:
                    if p_output == True:
                        print('Not enough liquid money to purchase ', symbol, ', order aborted.')
                    self.amount['Cash'] = old_cash
                    self.amount[symbol] = old_asset
                else:
                    if p_output == True:
                        if is_buy:
                            o_type = 'bought'
                        else:
                            o_type = 'sold'
                        print('Order executed: {} stocks of {} {} for {} each.'.format(amount, symbol, o_type, asset_value))

        except (OSError, KeyError):
            if p_output:
                print('File corresponding to symbol {} not found. Associated order aborted.'.format(symbol))
            else:
                pass
    
     
    def get_portfolio_value(self, dates):
        """Computes the portfolio value on a specific day.
        If df containing the data is provided, it reads from there, otherwise it reads from file.
        Returns the portfolio value, and the boolean some_nan_value, that tells if some of the stock
        had a NaN value."""
        
        u_symb = list(self.symbols)
        for s in self.symbols:
            if self.amount[s] == 0:
                u_symb.remove(s)
        u_symb.remove('Cash')
        
        df_value = pd.DataFrame(index=dates, columns=['Value'])
        df_data = get_data(u_symb, dates)
        
        df_value['Value'] = self.amount['Cash']
        
        for s in u_symb:
            df_value['Value'] += abs(self.amount[s]) * df_data[s] 
            
        df_value = df_value.dropna()
                    
        return df_value
    
    
    def get_daily_returns(self, dates):
        d_value = self.get_portfolio_value(dates)
        
        return get_daily_returns(d_value)    
    
    
    def get_daily_returns_mean(self, dates):
        dr = self.get_daily_returns(dates)
        return dr.mean()
    
    
    def get_risk(self, dates):
        dr = self.get_daily_returns(dates)
        return dr.std()
    
    
    def get_sharp_ratio(self, dates):
        dr = self.get_daily_returns(dates)
        return np.sqrt(252) * dr.mean()/dr.std()
        
    
    def print_exposition(self):
        for s in self.symbols:
            if self.amount[s] != 0:
                print(s, ': ', self.amount[s])
                
                

def get_order_list(filename):
    df_olist = pd.read_csv(filename, header=-1, skipinitialspace=True)
    olist = []
    
    for i in df_olist.index:
        loc_dict = dict()
        loc_dict['date'] = str(df_olist.ix[i][0]) + '-' + str(df_olist.ix[i][1]) + '-' + str(df_olist.ix[i][2])
        loc_dict['symbol'] = str(df_olist.ix[i][3])
        if str(df_olist.ix[i][4]) == 'BUY':
            loc_dict['is_buy'] = True
        elif str(df_olist.ix[i][4]) == 'SELL':
            loc_dict['is_buy'] = False
        else:
            loc_dict['is_buy'] = None
        loc_dict['amount'] = df_olist.ix[i][5]
        
        olist += [loc_dict]
        
    return olist


def save_df_to_file(dataframe, filename):
    file = open(filename, 'w')
    
    for i in range(len(dataframe.index)):
        dateline = str(dataframe.index[i].year) + ', ' + str(dataframe.index[i].month) + ', ' + str(dataframe.index[i].day)
        file.write(dateline)
        for c in dataframe.columns:
            line = ', ' + str(dataframe.ix[i, c])
            file.write(line)
        if i != len(dataframe.index) - 1:
            file.write('\n')
    
    file.close()
    


scriptname, initial_cash, orders_file, output_file = argv
initial_cash = float(initial_cash)
orders_file = str(orders_file)
output_file = str(output_file)
base_dir='C:/Users/Pietro T/Documenti/ML_and_finance/exercises/'

symbols = get_symbols_from_txt('SP500-symbols.txt')

orders_list = get_order_list(os.path.join(base_dir, orders_file))

pfolio = Portfolio(initial_cash, symbols, orders_list)

start_date = '2015-01-01'
end_date = '2015-02-28'
dates = pd.date_range(start_date, end_date)
pf_value = pfolio.get_portfolio_value(dates)

save_df_to_file(pf_value, output_file)

Overwriting marketsym.py


In [15]:
def plot_data(df, title='Stock prices'):
    '''Plot stock prices in df, with title'''
    ax = df.plot(title=title, fontsize=10)
    ax.set_xlabel('Date')
    ax.set_ylabel('Price')
  
    return ax


def plot_selected(df, columns, start_index, end_index, title='Stock prices'):
    '''Plot desired columns, in desired range'''
    ax = df.ix[start_index:end_index, columns].plot(title=title, fontsize=10)
    ax.set_xlabel('Date')
    ax.set_ylabel('Price')
    
    plt.show()

    
import urllib.request
import urllib.error


def download_one(the_url, the_name):
    try:
        urllib.request.urlretrieve(the_url, the_name)
        
    except urllib.error.HTTPError:
        print(the_url, 'not found')


def download_from_symbols_list(syms, 
                              base_url='http://ichart.finance.yahoo.com/table.csv?d=12&e=31&f=2015&g=d&a=7&b=19&c=2004%20&ignore=.csv&s=', 
                              base_where = 'C:/Users/Pietro T/Documenti/ML_and_finance/exercises/data2/'):
    for i in range(len(syms)):
        download_one(base_url + syms[i], base_where+syms[i] + '.csv')
        if i % 10 == 0:
            print('Downloading file ', i, ' out of ', len(syms))

            

In [217]:
%%writefile analyze.py

import os
import pandas as pd
import numpy as np
import time
import random
import datetime
import matplotlib.pyplot as plt
import scipy.optimize as spo

from sys import argv


def symbol_to_path(symbol, base_dir='C:/Users/Pietro T/Documenti/ML_and_finance/exercises/data'):
    """Returns the file path given the symbol"""
    return os.path.join(base_dir, '{}.csv'.format(str(symbol)))


def plot_data(df, title='Portfolio Value vs Symbol'):
    ax = df.plot(title=title, fontsize=10)
    ax.set_xlabel('Date')
    ax.set_ylabel('Price')
    
    plt.show()
  
    return ax


def normalize_data(df):
    '''Normalize stock prices to have initial value equal to 1'''
    return df/df.ix[0,:]


def get_data(symbo, dates, col='Adj Close', printerror=True):
    """"For the given sybols, returns the values of cols in the range dates"""
    df = pd.DataFrame(index=dates)
    symbols = symbo.copy()
    if 'SPY' not in symbols:
        symbols.insert(0, 'SPY')
    
    for s in symbols:
        try:
            df_temp = pd.read_csv(symbol_to_path(s), index_col='Date', parse_dates=True, usecols=['Date', col],
                             na_values=['nan'])

            df_temp = df_temp.rename(columns={col:s})
            df = df.join(df_temp)
        except OSError:
            if printerror:
                print('File {}.csv not found, skipping the associated column.'.format(symbo))
        
        if s == 'SPY':
            df = df.dropna(subset=['SPY'])
        
    df.fillna(method='ffill', inplace=True)
    df.fillna(method='bfill', inplace=True)
    
    return df


def read_values(filename):
    df_raw = pd.read_csv(filename, header=-1, skipinitialspace=True)
    dates = []
    vals = []
    
    for i in df_raw.index:
        loc_date = str(int(df_raw.ix[i][0])) + '-' + str(int(df_raw.ix[i][1])) + '-' + str(int(df_raw.ix[i][2]))
        dates += [loc_date]
        vals += [df_raw.ix[i][3]]
    
    dates = pd.to_datetime(dates)
    
    df_good = pd.DataFrame(index=dates, columns=['Value'])
    df_good['Value'] = vals
        
    return df_good


def get_daily_returns(df):
    dr = df.copy()
    dr = (df/ df.shift(1)) - 1
    dr.ix[0,:] = 0
    
    return dr


def get_daily_returns_mean(df):
    dr = get_daily_returns(df)
    return dr.mean()


def get_risk(df):
    dr = get_daily_returns(df)
    return dr.std()


def get_sharp_ratio(df):
    dr = get_daily_returns(df)
    return np.sqrt(252) * dr.mean()/dr.std()



scriptname, values_file, compare_symb = argv
values_file = str(values_file)
compare_symb = str(compare_symb)
base_dir='C:/Users/Pietro T/Documenti/ML_and_finance/exercises/'


values_df = read_values(os.path.join(base_dir, values_file))

try:
    tocompare_df = get_data([compare_symb], values_df.index)[compare_symb]
    total_df = values_df.join(tocompare_df)
    total_df.fillna(method='ffill', inplace=True)
    total_df.fillna(method='bfill', inplace=True)
    
    print('The average daily return of the values is:', get_daily_returns_mean(total_df)['Value'])
    print('The standard deviation of the values is:', get_risk(total_df)['Value'])
    print('The Sharp ratio of the values is:', get_sharp_ratio(total_df)['Value'])
    
    title_plot = 'The values against ' + compare_symb
    plot_data(normalize_data(total_df), title=title_plot)
    
    
except KeyError:
    print('Datas for {} not found, job aborted.'.format(compare_symb))


Overwriting analyze.py


In [190]:
listo = ['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04']

In [194]:
lista = pd.to_datetime(listo)

In [197]:
print(lista[0])

2016-01-01 00:00:00


In [204]:
def read_values(filename):
    df_raw = pd.read_csv(filename, header=-1, skipinitialspace=True)
    dates = []
    vals = []
    
    for i in df_raw.index:
        loc_date = str(int(df_raw.ix[i][0])) + '-' + str(int(df_raw.ix[i][1])) + '-' + str(int(df_raw.ix[i][2]))
        dates += [loc_date]
        vals += [df_raw.ix[i][3]]
    
    print(dates)
    dates = pd.to_datetime(dates)
    
    df_good = pd.DataFrame(index=dates, columns=['Value'])
    df_good['Value'] = vals
        
    return df_good

In [205]:
base_dir='C:/Users/Pietro T/Documenti/ML_and_finance/exercises/'
dfo = read_values(os.path.join(base_dir, 'prime_value.csv'))

['2015-1-2', '2015-1-5', '2015-1-6', '2015-1-7', '2015-1-8', '2015-1-9', '2015-1-12', '2015-1-13', '2015-1-14', '2015-1-15', '2015-1-16', '2015-1-20', '2015-1-21', '2015-1-22', '2015-1-23', '2015-1-26', '2015-1-27', '2015-1-28', '2015-1-29', '2015-1-30', '2015-2-2', '2015-2-3', '2015-2-4', '2015-2-5', '2015-2-6', '2015-2-9', '2015-2-10', '2015-2-11', '2015-2-12', '2015-2-13', '2015-2-17', '2015-2-18', '2015-2-19', '2015-2-20', '2015-2-23', '2015-2-24', '2015-2-25', '2015-2-26', '2015-2-27']


In [207]:
print(dfo.index)

DatetimeIndex(['2015-01-02', '2015-01-05', '2015-01-06', '2015-01-07',
               '2015-01-08', '2015-01-09', '2015-01-12', '2015-01-13',
               '2015-01-14', '2015-01-15', '2015-01-16', '2015-01-20',
               '2015-01-21', '2015-01-22', '2015-01-23', '2015-01-26',
               '2015-01-27', '2015-01-28', '2015-01-29', '2015-01-30',
               '2015-02-02', '2015-02-03', '2015-02-04', '2015-02-05',
               '2015-02-06', '2015-02-09', '2015-02-10', '2015-02-11',
               '2015-02-12', '2015-02-13', '2015-02-17', '2015-02-18',
               '2015-02-19', '2015-02-20', '2015-02-23', '2015-02-24',
               '2015-02-25', '2015-02-26', '2015-02-27'],
              dtype='datetime64[ns]', freq=None)


In [208]:
qq = get_data([], dfo.index)

In [214]:
dd = get_data(['AAPL'], dfo.index)['AAPL']

In [212]:
print(dd)

2015-01-02    106.918237
2015-01-05    103.906178
2015-01-06    103.915960
2015-01-07    105.373089
2015-01-08    109.421762
2015-01-09    109.539117
2015-01-12    106.840000
2015-01-13    107.788603
2015-01-14    107.377870
2015-01-15    104.463604
2015-01-16    103.651911
2015-01-20    106.321692
2015-01-21    107.133385
2015-01-22    109.920514
2015-01-23    110.487721
2015-01-26    110.605069
2015-01-27    106.732426
2015-01-28    112.766317
2015-01-29    116.277127
2015-01-30    114.575513
2015-02-02    116.013079
2015-02-03    116.032642
2015-02-04    116.922564
2015-02-05    117.757104
2015-02-06    116.765484
2015-02-09    117.541107
2015-02-10    119.799242
2015-02-11    122.607191
2015-02-12    124.158437
2015-02-13    124.767156
2015-02-17    125.503506
2015-02-18    126.377307
2015-02-19    126.112217
2015-02-20    127.143110
2015-02-23    130.579411
2015-02-24    129.764515
2015-02-25    126.446026
2015-02-26    128.046365
2015-02-27    126.122045
Name: AAPL, dtype: float6