In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Import Data
raw_stocks = pd.read_csv('../input/daily-historical-stock-prices-1970-2018/historical_stocks.csv')
raw_prices = pd.read_csv('../input/daily-historical-stock-prices-1970-2018/historical_stock_prices.csv')

In [None]:
# Select Stock Universe
# Tech Stocks
stocks = raw_stocks[raw_stocks.sector == 'TECHNOLOGY']
prices = raw_prices[raw_prices.ticker.isin(stocks.ticker)]



In [None]:
# Get Adjusted Close Price
adj_close = prices.pivot(index = 'date', columns = 'ticker', values = 'adj_close')
adj_close.index = pd.to_datetime(adj_close.index)
adj_close = adj_close[adj_close.index > '01/01/2010']

In [None]:
# Get Weekly
adj_close = adj_close.resample('W').last()

In [None]:
# Get Weekly Log Returns
log_returns = np.log(adj_close / adj_close.shift(1))

In [None]:
# Define get top / bot performers
def get_top(data, top_n = 10):
    top_performers = data.shift(1).copy()
    for n, i in data.shift(1).iterrows():
        top_performers.loc[n] = i.nlargest(top_n)
    top_performers = top_performers.notna().astype(np.int)
    return top_performers

def get_bot(data, bot_n = 10):
    bot_performers = data.shift(1).copy()
    for n, i in data.shift(1).iterrows():
        bot_performers.loc[n] = i.nsmallest(bot_n)
    bot_performers = bot_performers.notna().astype(np.int)
    return bot_performers

In [None]:
# Calculate Model Returns
n_stocks = 10
returns = ((get_top(log_returns, n_stocks) + get_bot(log_returns, n_stocks)) * log_returns ) / n_stocks

# Calculate Sector Returns
sector_returns = log_returns

In [None]:
import matplotlib.pyplot as plt
plt.plot(returns.T.sum())
plt.plot(sector_returns.T.mean())

In [None]:
plt.plot(returns.T.sum().cumsum())
plt.plot(sector_returns.T.mean().cumsum())

In [None]:
expected_returns_by_date = returns.T.sum().dropna()
portfolio_ret_mean = expected_returns_by_date.mean()
portfolio_ret_se = expected_returns_by_date.sem()
portfolio_ret_annual = (np.exp(expected_returns_by_date * 52) - 1) * 100

expected_sector_returns_by_date = sector_returns.T.sum().dropna()
sector_ret_mean = expected_sector_returns_by_date.mean()

from scipy import stats

data = expected_returns_by_date
t_value, p_value = stats.ttest_1samp(data, sector_ret_mean)
p_value /= 2

print('T:', t_value, "P:", p_value)