In [1]:
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import time
import scipy.optimize as spo 
import yfinance

In [6]:
def symbol_to_path(symbol, base_dir = 'C:/Users/gupta/Downloads'):
    return os.path.join(base_dir,"{}.csv".format(str(symbol)))

In [7]:
#symbols = list, dates = date_range()
def get_data(symbols, dates, c_name): 
    df = pd.DataFrame(index = dates)
    if 'SPY' not in symbols:
        symbols.insert(0,'SPY')
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol), index_col = 'Date', parse_dates=True, usecols = ['Date',c_name], na_values=['nan'])
        df_temp = df_temp.rename(columns={c_name:symbol})
        df = df.join(df_temp)
        if symbol == 'SPY':
            df = df.dropna(subset =["SPY"])
    return df

In [23]:
def plot_data(df, title = "Prices"):
    ax = df.plot(title=title, fontsize = 10)
    ax.set_xlabel("Date")
    ax.set_ylabel("value")
    plt.show()

In [9]:
def plot_selected(df, columns, start_index, end_index):
    plot_data(df.loc[start_index:end_index, columns], title = "Selected Data")

In [10]:
def normalize_data(df):
    return df/df.iloc[0]

In [11]:
def rm_calc(symbol,rq,s_date, e_date, c_name):
    df = get_data([symbol], pd.date_range(s_date,e_date),c_name)
    ax = df[symbol].plot(title = 'Rolling Mean', label = symbol)
    rm_S = df[symbol].rolling(rq).mean()
    rm_S.plot(label = 'Rolling mean',ax=ax)
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    ax.legend(loc='upper left')
    plt.show()

In [72]:
def compute_daily_returns(df):
    daily_returns = (df/df.shift(1))-1
    
    return daily_returns

In [13]:
def compute_cumulative_returns(df):
    daily_returns = (df/df.loc[0])-1
    daily_returns.loc[0]=0
    return daily_returns

In [14]:
def fill_stock(df):
    df.fillna(method="ffill", inplace="TRUE")
    df.fillna(method = "bfill", inplace ="TRUE")
    return df
    

In [43]:
def plot_hist_dr(symbol, s_date, e_date):
    dates = pd.date_range(s_date,e_date)
    symbols=[symbol]
    df = get_data(symbols, dates, 'Adj Close')
    daily_r = compute_daily_returns(df)
    daily_r[symbol].hist(bins = 20)
    mean = daily_r[symbol].mean()
    std = daily_r[symbol].std()
    plt.axvline(mean, color ='w', linewidth = 3)
    plt.axvline(std, color ='r', linewidth = 2)
    plt.axvline(-std, color ='r', linewidth = 2)
    plt.show()

In [58]:
def plot_scatter_dr(symbols, s_date, e_date, x_val, y_val, c_name):
    df = get_data(symbols,pd.date_range(s_date,e_date),c_name)
    daily_r = compute_daily_returns(df)
    daily_r.plot(kind = 'scatter',x=x_val,y=y_val)
    plt.show()

In [73]:
def portfolio_value(start_val, s_date, e_date, symbols, alloc):
    df = get_data(symbols,pd.date_range(s_date,e_date), 'Adj Close' )
    df = normalize_data(df)
    i = 0
    for symbol in symbols:
        df[symbol]=df[symbol]*alloc[i]
        i=i+1
    df = df*start_val 
    port_val = df.sum(axis=1)
    return port_val

In [76]:
def stats_on_p(port_val):
    daily_r= compute_daily_returns(port_val)
    daily_r = daily_r[1:]
    avg_dr = daily_r.mean()
    std_dr = daily_r.std()
    sharpe_r = np.sqrt(252)*avg_dr/std_dr
    return [avg_dr, std_dr, sharpe_r]

In [77]:
pv = portfolio_value(1000000, '2019-01-02','2019-12-30',['SPY','GOOG'],[0.6,0.4])
print(stats_on_p(pv))

[0.0010847857322331578, 0.009841398951777637, 1.7497959105264942]


In [79]:
#line = tuple/list/array (slope,intercept) data = 2D array of points (x,y)(points on a scatter)
def error(line, data):
    err = np.sum((data[:,1]-(line[0]*data[:,0]+line[1]))**2)
    return err