In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_datareader as web
import os
import datetime as dt
import seaborn as sns
import requests

In [None]:
ddf = pd.read_csv('../input/sp500-11year-history/SP500_11year.csv')
ddf.head()

# Finding Unicorns
I found a subset of stocks that perform at 10X the rest of the market.  
When I isolated these stocks, I simulated a buying pattern and I want to show that pattern here. 

In [None]:
ddf['B1S10'] = ddf['ND10']/ddf['ND1']             # Buy on day 1 Sell on Day 10
ddf['B2S10'] = ddf['ND10']/ddf['ND2']
ddf['B3S10'] = ddf['ND10']/ddf['ND3']

ddf['B1S15'] = ddf['ND15']/ddf['ND1']             # Buy on day 1 Sell on Day 15
ddf['B2S15'] = ddf['ND15']/ddf['ND2']
ddf['B3S15'] = ddf['ND15']/ddf['ND3']

ddf['B1S20'] = ddf['ND10']/ddf['ND1']             # Buy on day 1 Sell on Day 20
ddf['B2S20'] = ddf['ND10']/ddf['ND2']
ddf['B3S20'] = ddf['ND10']/ddf['ND3']

In [None]:
# ddf.reset_index(inplace = True)
ddfg = ddf.groupby(['Date', 'Ticker']).agg({'P1':sum})                    # sort by date, P1 is performance over the prior day, ascending
dives = ddfg['P1'].groupby(['Date'], group_keys = False).nsmallest(1)     # take the worst performer for each day
dives = dives.to_frame()                                                  # make it a df

dd = dives.merge(ddf, how = 'left', on = ['Date', 'Ticker', 'P1'])        # do a merge with ddf to get all the data
dd['Percentage1'] = (dd['D1']-dd['Close'])/dd['Spread']                   # to find a Unicorn, we need to compare the day's dip against the month's volatility


Unicorn = dd[dd['Percentage1'] < 0.1].filter(['B1S10', 'B2S10', 'B3S10', 'B1S15', 'B2S15', 'B3S15', 'B1S20', 'B2S20', 'B3S20']).describe()   # filter the Percentage1 to under 10%
Unicorn       # Unicorns get flagged as the worst performer of the day.  But their flagged dip represents less than 10% of the prior month's volatility

In [None]:
av_stock = ddf.filter(['B1S10', 'B2S10', 'B3S10', 'B1S15', 'B2S15', 'B3S15', 'B1S20', 'B2S20', 'B3S20']).describe()
av_stock  # Pay attention to the mean value of an average stock over 11 years

In [None]:
# This multiplier represents the performance of the group Unicorns over average stocks
multiplier = round(((Unicorn.iloc[1]-1)/(av_stock.iloc[1]-1)), 2)
multiplier

In [None]:
# Now that we have established that these stocks perform 10X the market, let's look at them.
Unicorn_stocks = dd[dd['Percentage1'] < 0.1]
Unicorn_stocks['Date'] = Unicorn_stocks['Date'].astype('datetime64')
# Unicorn_stocks  # Uncomment to view the stocks

Now let's graph one of the stocks to see how it performs as a Unicorn

In [None]:
date = '2011-08-23'           # Define the date
stock = 'PHM'                 # Define the stock, or ticker
close = ddf[(ddf['Date']==date) & 
            (ddf['Ticker'] == stock)]['Close'].values[0] # Set the Closing value
i = ddf[(ddf['Date']==date) & 
        (ddf['Ticker'] == stock)].index.values.astype(int)[0] # Set the Closing index value
graph_start = i-30               # Set the index value for the graph start, end, holding period
graph_end =  i+20
holding = i+10
X = ddf.filter(['Date']).iloc[graph_start:graph_end].astype('datetime64')  # Define the X axis
y = ddf.filter(['Close']).iloc[graph_start:graph_end]   # Define the y axis
s = ddf.filter(['Close']).iloc[holding].values[0]       # We'll make a horizontal line at value s

In [None]:

plt.plot(X, y)
plt.xlabel('Dates before and after Unicorn Performance')
plt.ylabel('Close Value')
plt.title(f'{stock} performing as a Unicorn\nBuy on Red, Sell on Green')
plt.xticks(rotation = 30)
plt.axvline(x=(ddf.filter(['Date']).iloc[i+5]), color = 'y', alpha = 0.5, 
            linewidth = 65, ymax = 0.3)  # This linewidtch trick shows when we buy and sell in time
plt.axvline(x = (ddf.filter(['Date']).iloc[i]), color = 'y', alpha = 0.5)
plt.axhline(y=close, color ='r', alpha = 0.2)
plt.axhline(y = s, color = 'g', alpha = 0.3)
plt.show()

Now that we have one Unicorn graphed, let's graph them all.  
We used the ddf indexing to make the x axis in the prior example, but it doesn't always work.

In this next piece of code, we'll pull the data directly from yahoo

In [None]:
for i, r in Unicorn_stocks.iterrows():
    try:
        date = r['Date']
        stock = r['Ticker']
        close = r['Close']
        g_start = date - pd.Timedelta(days=35)
        g_end = date + pd.Timedelta(days=21)
        holding = date + pd.Timedelta(days = 14)

        df = web.DataReader(stock, 'yahoo', g_start, g_end)
        plt.xlabel('Dates before and after Unicorn Performance')
        plt.ylabel('Close Value')
        plt.title(f'{stock} performing as a Unicorn\nBuy on Red, Sell on Green')
        plt.axvline(x=date, color = 'y', alpha = 0.5)
        plt.axhline(y=close, color ='r', alpha = 0.2)
        plt.axhline(y = df.loc[holding]['Close'], color = 'g', alpha = 0.3)
        plt.xticks(rotation = 90)
        plt.plot(df['Close'])
        #plt.savefig(f'../output/Unicorn_images/{stock}on{date.year}_{date.month}_{date.day}.png')
        print(r['Ticker'])
        plt.show()
        plt.close()
        continue
    except KeyError:
        print("Problems with "+ r['Ticker'])
        continue

In [None]:
Us = Unicorn_stocks.copy()
from scipy import stats
import pylab as pl
h = Us['B1S10'].sort_values()
xran = range(-30,130)
fit = stats.norm.pdf(h, np.mean(h), np.std(h))
plt.title('Distribution of Unicorn Stocks\nAs Percentage Growth')
pl.plot(h, fit, '-o')
pl.show()

In [None]:
g = ddf['B1S10'].sort_values()
t, p = stats.ttest_ind(h, g, equal_var=False)
fit = stats.norm.pdf(h, np.mean(h), np.std(h))
gfit = stats.norm.pdf(g, np.mean(g), np.std(g))
pl.title('Distribution of Unicorns\nAgainst All Stocks')
pl.plot(h, fit, '-o')
pl.plot(g, gfit, 'r')
pl.axvline(x = 1, c = 'y', alpha = 0.8)
pl.show()

Just to recap, we isolated a buying pattern that shows a history of 10X typical market performance.  Because there were only 45 out of 1.3M records, we called them Unicorns.  To define a Unicorn, we filtered the worst performance of the day for each trading day, and then qualified those worst performers by a ratio of the dip compared to the month's volatility.  We followed up identifying the Unicorns by graphing each of their performances over about two months.  Finally, we used some stats graphing to show how the distribution of Unicorns differs from all stocks.  It can be seen that while some Unicorns perform under market value, most of them have a positive ROI and a special few outliers help drive up the multiplier. 

If you stumble upon this data set and want to carry it forward, choose a different buying pattern for the same stocks, expand the threshold for the volatility (to get more stocks), filter the worst five or ten performers of the day (to get more stocks), look for a different historical event (here I chose the worst performer of the day, but you might try the highest performer of the day, or those with four declining days in a row).  Make sure that you assess your subset's performance and compare it to typical market performance.  

Disclosure:  I'm not advocating that you buy any particular stock, or providing assurance as to the performance of any particular stock.  