# <a class="anchor" id="toc"></a> Table of Contents:
* [Data Generation](#data_gen)
* [Adding Outliers](#add_outliers)
* [Outlier Detection](#outlier_detection)
    * [Historical Mean and Standard Deviation](#hist_std)
    * [Expanding Mean and Standard Deviation](#exp_std)
    * [Rolling Mean and Standard Deviation](#roll_std)
        * [Simple Moving Average and Standard Deviation](#sma_std)
        * [Exponential Moving Average and Standard Deviation](#ema_std)
* [Robust Outlier Detection](#robust_outlier_detection)
    * [Median Absolute Deviation (MAD)](#mad)
        * [Simple Median and MAD](#simple_mad)
        * [Expanding Median and MAD](#exp_mad)
        * [Rolling Median and MAD](#rolling_mad)
    * [Interquartile Range (IQR)](#iqr)
        * [Simple IQR](#simple_iqr)
        * [Expanding IQR](#exp_iqr)
        * [Rolling IQR](#rolling_iqr)

In [20]:
import pandas as pd
import numpy as np
import plotly
import plotly.offline as py
import plotly.graph_objs as go
import datetime
import pandas_datareader.data as web

In [21]:
plotly.tools.set_credentials_file(username='msaedi', api_key='L8bkNTGrBFyGfxB4XdJ5')
py.init_notebook_mode(connected=True)

# Data Generation <a class="anchor" id="data_gen"></a>

[Back to Table of Contents](#toc)

Let's generate 200 random points and add a drift term to it:

In [47]:
num = 200
t = np.linspace(-0.05, .2, num) #drift
x = t + np.random.normal(size=num)/20.
df = pd.DataFrame({'Date':pd.date_range(end='1/31/2018', periods = num, freq='M'), 'Value':x})

Let's see some descriptive statistics that summarize the central tendency, dispersion and shape of our dataset:

In [48]:
df.describe()

Unnamed: 0,Value
count,200.0
mean,0.070807
std,0.082884
min,-0.1181
25%,0.012035
50%,0.064337
75%,0.129345
max,0.266385


Let's also plot our dataset:

In [49]:
data = [go.Scatter(x=df.Date, y=df.Value, name='Returns data')]
layout = go.Layout(
    title='Returns data',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [50]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

# Adding Outliers<a class="anchor" id="add_outliers"></a>

[Back to Table of Contents](#toc)

Our dataset is generated using Gaussian distribution and does not seem to have many outliers. We can modify the dataset and add some ourliers to it:

In [51]:
def create_outliers(df, interval, value_col, min_intensity ,max_intensity):
    df = df.copy()
    df['Outlier'] = False
    df['Intensity'] = None
    idx = 0
    for jdx in range(interval, df.shape[0]+1, interval):
        if jdx >= df.shape[0]:
            break
        curr_max_idx = df.iloc[idx:jdx][value_col].idxmax()
        curr_min_idx = df.iloc[idx:jdx][value_col].idxmin()
        intensity = np.random.uniform(0., max_intensity)
        df.loc[curr_max_idx, value_col] = intensity + df.loc[curr_max_idx, value_col]
        df.loc[curr_max_idx, 'Outlier'] = True
        df.loc[curr_max_idx, 'Intensity'] = intensity
        
        intensity = np.random.uniform(min_intensity, 0)
        df.loc[curr_min_idx, value_col] = intensity + df.loc[curr_min_idx, value_col]
        df.loc[curr_min_idx, 'Outlier'] = True
        df.loc[curr_min_idx, 'Intensity'] = intensity
        idx = jdx
    return df

In [52]:
interval = 36
value_col = 'Value'
min_intensity = -.1
max_intensity = .5
df_w_outliers = create_outliers(df, interval, value_col, min_intensity, max_intensity)

Let's see the summary of statisctics of our new dataset:

In [53]:
df_w_outliers.describe()

Unnamed: 0,Value
count,200.0
mean,0.078941
std,0.109198
min,-0.15427
25%,0.012035
50%,0.065527
75%,0.131512
max,0.640521


In [54]:
print 'The mean of the dataset has changed by {mean:.2} but the median has only changed by {median:.2}'.format(
    mean=abs(df_w_outliers.Value.mean()-df.Value.mean()),
    median=abs(df_w_outliers.Value.median()-df.Value.median()))

The mean of the dataset has changed by 0.0081 but the median has only changed by 0.0012


The mean has been impacted more by anomalies than the median. Let's plot our new dataset:

In [55]:
dataset = go.Scatter(x=df_w_outliers.Date, y=df_w_outliers.Value, name='Returns')
ourliers = go.Scatter(x=df_w_outliers[df_w_outliers.Outlier].Date, 
                      y=df_w_outliers[df_w_outliers.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers]
layout = go.Layout(
    title='Returns data with outliers',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [56]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

# Outlier Detection<a class="anchor" id="outlier_detection"></a>

## Historical Mean and Standard Deviation<a class="anchor" id="hist_std"></a>

[Back to Table of Contents](#toc)

One way we are currently identifying outliers is by tagging data points that are n standard deviation away from the overall mean of the dataset:

In [57]:
def generate_std_envelope(df, z, value_col):
    df = df.copy()
    mu = df[value_col].mean()
    sigma = df[value_col].std()
    df['Mean'] = mu
    df['Hi'] = mu + z * sigma
    df['Low'] = mu - z * sigma
    return df

In [58]:
z=4
value_col='Value'
df_hist = generate_std_envelope(df_w_outliers, z, value_col)

In [59]:
dataset = go.Scatter(x=df_hist.Date, y=df_hist.Value, name='Returns')
hi = go.Scatter(x=df_hist.Date, y=df_hist.Hi, name='Upper Band', fill='tonexty', line=dict(color='rgb(220,220,220)'))
mean = go.Scatter(x=df_hist.Date, y=df_hist.Mean, name='Average', line=dict(color='rgb(150, 0, 0)'))
low = go.Scatter(x=df_hist.Date, y=df_hist.Low, name='Lower Band', fill=None, line=dict(color='rgb(220,220,220)'))
ourliers = go.Scatter(x=df_hist[df_hist.Outlier].Date, 
                      y=df_hist[df_hist.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers, low, hi, mean]
layout = go.Layout(
    title='Historical Mean and Standard Deviation',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [60]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

This technique only captures a small subset of the anomalies. There are at least two problems with it:
 - This technique ignores the trend and therefore creates very wide bands around the data, which ignores a lot of anomalies.
 - Since the mean is not robust to anomalies, it failes to detect many of them as anomalies are averaged into the mean.

## Expanding Mean and Standard Deviation<a class="anchor" id="exp_std"></a>

[Back to Table of Contents](#toc)

A second way we are currently identifying outliers is by tagging data points that are n standard deviation away from the _expanding_ mean of the dataset:

In [61]:
def generate_expanding_std_envelope(df, z, value_col):
    df = df.copy()
    exp = df[value_col].expanding()
    mu = exp.mean()
    sigma = exp.std()
    df['Mean'] = mu
    df['Hi'] = mu + z * sigma
    df['Low'] = mu - z * sigma
    return df

In [62]:
z=4
value_col='Value'
df_expanding = generate_expanding_std_envelope(df_w_outliers, z, value_col)

In [63]:
dataset = go.Scatter(x=df_expanding.Date, y=df_expanding.Value, name='Returns')
hi = go.Scatter(x=df_expanding.Date, y=df_expanding.Hi, name='Expanding Upper Band', fill='tonexty', line=dict(color='rgb(220,220,220)'))
mean = go.Scatter(x=df_expanding.Date, y=df_expanding.Mean, name='Expanding Average', line=dict(color='rgb(150, 0, 0)'))
low = go.Scatter(x=df_expanding.Date, y=df_expanding.Low, name='Expanding Lower Band', fill=None, line=dict(color='rgb(220,220,220)'))
ourliers = go.Scatter(x=df_expanding[df_expanding.Outlier].Date, 
                      y=df_expanding[df_expanding.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers, low, hi, mean]
layout = go.Layout(
    title='Expanding Mean and Standard Deviation',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [64]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

This technique tries to capture the trend but it still has the following problems:
 - Since the trend has a positive slope and this technique puts equal weights on all data points, the average is held back by the earlier data points and fails to fully capture the trend.
 - The average is still impacted by the anomalies

## Rolling Mean and Standard Deviation<a class="anchor" id="roll_std"></a>

[Back to Table of Contents](#toc)

To address the first issue with the expanding standard deviation, we can try use only a subset of more recent observations to estimate the trend (e.g. simple moving average) or put more emphasis on the more recent observations (e.g. exponential moving average).

### Simple Moving Average and Standard Deviation<a class="anchor" id="sma_std"></a>

[Back to Table of Contents](#toc)

In [65]:
def generate_simple_rolling_std_envelope(df, w, z, value_col):
    df = df.copy()
    window = df[value_col].rolling(w)
    mu = window.mean()
    sigma = window.std()
    df['Mean'] = mu
    df['Hi'] = mu + z * sigma
    df['Low'] = mu - z * sigma
    return df

In [66]:
w=24
z=4
value_col='Value'
df_simple_rolling = generate_simple_rolling_std_envelope(df_w_outliers, w, z, value_col)

In [67]:
dataset = go.Scatter(x=df_simple_rolling.Date, y=df_simple_rolling.Value, name='Returns')
hi = go.Scatter(x=df_simple_rolling.Date, y=df_simple_rolling.Hi, name='Rolling Upper Band', fill='tonexty', line=dict(color='rgb(220,220,220)'))
mean = go.Scatter(x=df_simple_rolling.Date, y=df_simple_rolling.Mean, name='Rolling Average', line=dict(color='rgb(150, 0, 0)'))
low = go.Scatter(x=df_simple_rolling.Date, y=df_simple_rolling.Low, name='Rolling Lower Band', fill=None, line=dict(color='rgb(220,220,220)'))
ourliers = go.Scatter(x=df_simple_rolling[df_simple_rolling.Outlier].Date, 
                      y=df_simple_rolling[df_simple_rolling.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers, low, hi, mean]
layout = go.Layout(
    title='Simple Rolling Mean and Standard Deviation',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [68]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

### Exponential Moving Average and Standard Deviation<a class="anchor" id="ema_std"></a>

[Back to Table of Contents](#toc)

In [69]:
def generate_exponential_rolling_std_envelope(df, alpha, z, value_col):
    df = df.copy()
    ew = df[value_col].ewm(alpha=alpha)
    mu = ew.mean()
    sigma = ew.std()
    df['Mean'] = mu
    df['Hi'] = mu + z * sigma
    df['Low'] = mu - z * sigma
    return df

In [70]:
alpha=.1
z=4
value_col='Value'
df_exp_rolling = generate_exponential_rolling_std_envelope(df_w_outliers, alpha, z, value_col)

In [71]:
dataset = go.Scatter(x=df_exp_rolling.Date, y=df_exp_rolling.Value, name='Returns')
hi = go.Scatter(x=df_exp_rolling.Date, y=df_exp_rolling.Hi, name='Rolling Upper Band', fill='tonexty', line=dict(color='rgb(220,220,220)'))
mean = go.Scatter(x=df_exp_rolling.Date, y=df_exp_rolling.Mean, name='Rolling Average', line=dict(color='rgb(150, 0, 0)'))
low = go.Scatter(x=df_exp_rolling.Date, y=df_exp_rolling.Low, name='Rolling Lower Band', fill=None, line=dict(color='rgb(220,220,220)'))
ourliers = go.Scatter(x=df_exp_rolling[df_exp_rolling.Outlier].Date, 
                      y=df_exp_rolling[df_exp_rolling.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers, low, hi, mean]
layout = go.Layout(
    title='Exponential Rolling Mean and Standard Deviation',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [72]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

Both moving average techniques capture the trend better than before. However, they both still suffer from the issue of being greatly influenced by outliers. The reason is, the conventional meaures of scale, such as sample mean, variance, or standard deviation are non-robust. The overcome this issue, we can leverage robust measures of scale to capture the trend and identify outliers.

# Robust Outlier Detection<a class="anchor" id="robust_outlier_detection"></a>

## Median Absolute Deviation (MAD)<a class="anchor" id="mad"></a>

[Back to Table of Contents](#toc)

Instead of using the mean and standard deviations, we can use the median and MAD of the dataset to tag data points that are n MAD away from the median:

### Simple Median and MAD<a class="anchor" id="simple_mad"></a>

[Back to Table of Contents](#toc)

In [73]:
def generate_mad_envelope(df, z, value_col):
    df = df.copy()
    mu = df[value_col].median()
    sigma = df[value_col].mad()
    df['Median'] = mu
    df['Hi'] = mu + z * sigma
    df['Low'] = mu - z * sigma
    return df

In [74]:
z=4
value_col='Value'
df_mad = generate_mad_envelope(df_w_outliers, z, value_col)

In [75]:
dataset = go.Scatter(x=df_mad.Date, y=df_mad.Value, name='Returns')
hi = go.Scatter(x=df_mad.Date, y=df_mad.Hi, name='Upper Band', fill='tonexty', line=dict(color='rgb(220,220,220)'))
median = go.Scatter(x=df_mad.Date, y=df_mad.Median, name='Median', line=dict(color='rgb(150, 0, 0)'))
low = go.Scatter(x=df_mad.Date, y=df_mad.Low, name='Lower Band', fill=None, line=dict(color='rgb(220,220,220)'))
ourliers = go.Scatter(x=df_mad[df_mad.Outlier].Date, 
                      y=df_mad[df_mad.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers, low, hi, median]
layout = go.Layout(
    title='Historical Median and Median Absolute Deviation',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [76]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

Since the median and median absolute deviation are not influenced by the outliers, we immediately see some improvements over the historical standard deviation technique used previously.

### Expanding Median and MAD<a class="anchor" id="exp_mad"></a>

[Back to Table of Contents](#toc)

In [77]:
def generate_expanding_mad_envelope(df, z, value_col):
    mad = lambda x: np.median(np.fabs(x - np.median(x)))
    df = df.copy()
    exp = df[value_col].expanding()
    mu = exp.median()
    sigma = exp.apply(mad, raw=False)
    df['Median'] = mu
    df['Hi'] = mu + z * sigma
    df['Low'] = mu - z * sigma
    return df

In [78]:
z=4
value_col='Value'
df_exp_mad = generate_expanding_mad_envelope(df_w_outliers, z, value_col)

In [79]:
dataset = go.Scatter(x=df_mad.Date, y=df_exp_mad.Value, name='Returns')
hi = go.Scatter(x=df_exp_mad.Date, y=df_exp_mad.Hi, name='Expanding Upper Band', fill='tonexty', line=dict(color='rgb(220,220,220)'))
median = go.Scatter(x=df_exp_mad.Date, y=df_exp_mad.Median, name='Expanding Median', line=dict(color='rgb(150, 0, 0)'))
low = go.Scatter(x=df_exp_mad.Date, y=df_exp_mad.Low, name='Expanding Lower Band', fill=None, line=dict(color='rgb(220,220,220)'))
ourliers = go.Scatter(x=df_exp_mad[df_exp_mad.Outlier].Date, 
                      y=df_exp_mad[df_exp_mad.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers, low, hi, median]
layout = go.Layout(
    title='Expanding Median and Median Absolute Deviation',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [80]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

This looks much better, we still see the band widening towards the more recent data points. Also, we see that the trend line (in this case median) is starting to get impacted by the earlier data points. We can fix this by using a rolling function instead of the expanding function.

### Rolling Median and MAD<a class="anchor" id="rolling_mad"></a>

[Back to Table of Contents](#toc)

In [81]:
def generate_simple_rolling_mad_envelope(df, w, z, value_col):
    df = df.copy()
    mad = lambda x: np.median(np.fabs(x - np.median(x)))
    window = df[value_col].rolling(w)
    mu = window.median()
    sigma = window.apply(mad, raw=False)
    df['Median'] = mu
    df['Hi'] = mu + z * sigma
    df['Low'] = mu - z * sigma
    return df

In [82]:
w=24
z=4
value_col='Value'
df_simple_rolling_mad = generate_simple_rolling_mad_envelope(df_w_outliers, w, z, value_col)

In [83]:
dataset = go.Scatter(x=df_simple_rolling_mad.Date, y=df_simple_rolling_mad.Value, name='Returns')
hi = go.Scatter(x=df_simple_rolling_mad.Date, y=df_simple_rolling_mad.Hi, name='Rolling Upper Band', fill='tonexty', line=dict(color='rgb(220,220,220)'))
median = go.Scatter(x=df_simple_rolling_mad.Date, y=df_simple_rolling_mad.Median, name='Rolling Median', line=dict(color='rgb(150, 0, 0)'))
low = go.Scatter(x=df_simple_rolling_mad.Date, y=df_simple_rolling_mad.Low, name='Rolling Lower Band', fill=None, line=dict(color='rgb(220,220,220)'))
ourliers = go.Scatter(x=df_simple_rolling_mad[df_simple_rolling_mad.Outlier].Date, 
                      y=df_simple_rolling_mad[df_simple_rolling_mad.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers, low, hi, median]
layout = go.Layout(
    title='Simple Rolling Median and Median Absolute Deviation',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [84]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

## Interquartile Range (IQR)<a class="anchor" id="iqr"></a>

[Back to Table of Contents](#toc)

IQR is another measure of statistical dispersion. It is equal to the difference between 75th and 25th percentiles, or between upper and lower quartiles.

### Simple IQR<a class="anchor" id="simple_iqr"></a>

[Back to Table of Contents](#toc)

We can tag any observation 1.5 X IQR above/below the 25%/75% quartile as outliers.

In [87]:
def generate_simple_box_plot(df, n, value_col, top=.75, bottom=.25):
    df = df.copy()
    iqr = df[value_col].quantile(top) - df[value_col].quantile(bottom)
    df['Median'] = df[value_col].median()
    df['Hi'] = df[value_col].quantile(top) + n * iqr
    df['Low'] = df[value_col].quantile(bottom) - n * iqr
    return df

In [88]:
n=1.5
value_col='Value'
df_simple_box_plot = generate_simple_box_plot(df_w_outliers, n, value_col)

In [89]:
dataset = go.Scatter(x=df_simple_box_plot.Date, y=df_simple_box_plot.Value, name='Returns')
hi = go.Scatter(x=df_simple_box_plot.Date, y=df_simple_box_plot.Hi, name='Upper Band', fill='tonexty', line=dict(color='rgb(220,220,220)'))
median = go.Scatter(x=df_simple_box_plot.Date, y=df_simple_box_plot.Median, name='Median', line=dict(color='rgb(150, 0, 0)'))
low = go.Scatter(x=df_simple_box_plot.Date, y=df_simple_box_plot.Low, name='Lower Band', fill=None, line=dict(color='rgb(220,220,220)'))
ourliers = go.Scatter(x=df_simple_box_plot[df_simple_box_plot.Outlier].Date, 
                      y=df_simple_box_plot[df_simple_box_plot.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers, low, hi, median]
layout = go.Layout(
    title='Simple IQR',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [90]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

### Expanding IQR<a class="anchor" id="exp_iqr"></a>

[Back to Table of Contents](#toc)

In [91]:
def generate_expanding_box_plot(df, n, value_col, top=.75, bottom=.25):
    df = df.copy()
    exp = df[value_col].expanding()
    iqr = exp.quantile(top) - exp.quantile(bottom)
    df['Median'] = exp.median()
    df['Hi'] = exp.quantile(top) + n * iqr
    df['Low'] = exp.quantile(bottom) - n * iqr
    return df

In [92]:
n=1.5
value_col='Value'
df_exp_box_plot = generate_expanding_box_plot(df_w_outliers, n, value_col)

In [112]:
dataset = go.Scatter(x=df_exp_box_plot.Date, y=df_exp_box_plot.Value, name='Returns')
hi = go.Scatter(x=df_exp_box_plot.Date, y=df_exp_box_plot.Hi, name='Expanding Upper Band', fill='tonexty', line=dict(color='rgb(220,220,220)'))
median = go.Scatter(x=df_exp_box_plot.Date, y=df_exp_box_plot.Median, name='Expanding Median', line=dict(color='rgb(150, 0, 0)'))
low = go.Scatter(x=df_exp_box_plot.Date, y=df_exp_box_plot.Low, name='Expanding Lower Band', fill=None, line=dict(color='rgb(220,220,220)'))
ourliers = go.Scatter(x=df_exp_box_plot[df_exp_box_plot.Outlier].Date, 
                      y=df_exp_box_plot[df_exp_box_plot.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers, low, hi, median]
layout = go.Layout(
    title='Expanding IQR',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [113]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

### Rolling IQR<a class="anchor" id="rolling_iqr"></a>

[Back to Table of Contents](#toc)

In [95]:
def generate_simple_rolling_box_plot(df, w, n, value_col, top=.75, bottom=.25):
    df = df.copy()
    window = df[value_col].rolling(w)
    iqr = window.quantile(top) - window.quantile(bottom)
    df['Median'] = window.median()
    df['Hi'] = window.quantile(top) + n * iqr
    df['Low'] = window.quantile(bottom) - n * iqr
    return df

In [96]:
w=24
n=1.5
value_col='Value'
df_simple_rolling_box_plot = generate_simple_rolling_box_plot(df_w_outliers, w, n, value_col)

In [114]:
dataset = go.Scatter(x=df_simple_rolling_box_plot.Date, y=df_simple_rolling_box_plot.Value, name='Returns')
hi = go.Scatter(x=df_simple_rolling_box_plot.Date, y=df_simple_rolling_box_plot.Hi, name='Rolling Upper Band', fill='tonexty', line=dict(color='rgb(220,220,220)'))
median = go.Scatter(x=df_simple_rolling_box_plot.Date, y=df_simple_rolling_box_plot.Median, name='Rolling Median', line=dict(color='rgb(150, 0, 0)'))
low = go.Scatter(x=df_simple_rolling_box_plot.Date, y=df_simple_rolling_box_plot.Low, name='Rolling Lower Band', fill=None, line=dict(color='rgb(220,220,220)'))
ourliers = go.Scatter(x=df_simple_rolling_box_plot[df_simple_rolling_box_plot.Outlier].Date, 
                      y=df_simple_rolling_box_plot[df_simple_rolling_box_plot.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers, low, hi, median]
layout = go.Layout(
    title='Rolling IQR',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [115]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

# Other Techniques (Ignore)

In [110]:
dataset = go.Scatter(x=df_w_outliers.Date, y=df_w_outliers.Value.diff(), name='Returns')
ourliers = go.Scatter(x=df_w_outliers[df_w_outliers.Outlier].Date, 
                      y=df_w_outliers[['Value']].diff()[df_w_outliers.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers]
layout = go.Layout(
    title='Returns data with outliers',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [111]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

In [106]:
dataset = go.Scatter(x=df_w_outliers.Date, y=df_w_outliers.Value, name='Returns')
ourliers = go.Scatter(x=df_w_outliers[df_w_outliers.Outlier].Date, 
                      y=df_w_outliers[df_w_outliers.Outlier].Value, 
                      mode='markers',
                      name='Ourliers')
data = [dataset, ourliers]
layout = go.Layout(
    title='Returns data with outliers',
    xaxis=dict(title='Date'),
    yaxis=dict(title='Return', tickformat='.0%')
)

In [107]:
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)