In [1]:
import datetime as dt
import numpy as np
import pandas as pd
#from plotly import __version__ as plotly_version
from plotly.offline import init_notebook_mode, iplot
from statsmodels.tsa.arima_process import arma_generate_sample
#from scipy.signal import savgol_filter
init_notebook_mode(connected=True)         # initiate notebook for offline plot

In [2]:
# generate the data:
num_points = 1500
np.random.seed(13)
xs = arma_generate_sample([1, -1, 0, 0, 0], [1, -1, -1, -0.8, -0.3], nsample=num_points, sigma=0.5, burnin=100)
np.random.seed(10)
os = arma_generate_sample([1, -1, 0, 0, 0], [1, -1, -1, -0.8, -0.3], nsample=num_points, sigma=0.5, burnin=100) * 0.5
df = pd.date_range(start=pd.to_datetime('2014-01-01'), periods=1500, name='Date').to_frame(index=False)
df['A'] = xs + 20
df['B'] = xs + os + 20

In [3]:
min_x = 0
max_x = 60
min_y = 0
max_y = 80
plot_title = 'Scatter plot of A vs B'
x_title = 'Series A'
y_title = 'Series B'

In [4]:
layout = {
    'title': plot_title,
    'xaxis': {
        'title': x_title,
        'range': [min_x, max_x]
        },
    'yaxis': {
        'title': y_title,
        'range': [min_y, max_y]
        },
    'showlegend': False
}

In [5]:
series_scatter_points = {
    'x': df['A'],
    'y': df['B'],
    'mode': 'markers',
    'marker': {'size': 5}
}
A_mean = df['A'].mean()
B_mean = df['B'].mean()
pt_color_black = "rgb(0,0,0)"
balance_point = {
    'x' : [A_mean],
    'y' : [B_mean],
    'mode' : 'markers',
    'marker' : {'size': 12, 'color' : pt_color_black}
}

gray = "rgb(162, 162, 162)"
guide_line_style = {'color': gray, 'dash': 'dash'}

A_std = df['A'].std()
B_std = df['B'].std()
lower_range_x = A_mean - 2*A_std
upper_range_x = A_mean + 2*A_std
lower_range_y = B_mean - 2*B_std
upper_range_y = B_mean + 2*B_std

lower_vertical = {
    'x': [lower_range_x, lower_range_x],
    'y': [min_y, max_y],
    'mode': 'lines',
    'line': guide_line_style
}

upper_vertical = {
    'x' : [upper_range_x, upper_range_x],
    'y' : [min_y, max_y],
    'mode' : 'lines',
    'line' : guide_line_style
}

lower_horizontal = {
    'x' : [min_x, max_x],
    'y': [lower_range_y, lower_range_y],
    'mode' : 'lines',
    'line' : guide_line_style    
}

upper_horizontal = {
    'x' : [min_x, max_x],
    'y': [upper_range_y, upper_range_y],
    'mode' : 'lines',
    'line' : guide_line_style    
}

In [6]:
figure = {
    'data':
        [series_scatter_points,
            balance_point,
            lower_vertical,
            upper_vertical,
            lower_horizontal,
            upper_horizontal
        ],
    'layout': layout
}

iplot(figure)

In [7]:
df_lagged = df.copy()
df_lagged['A_lagged'] = df_lagged['A'].shift(1)
df_lagged = df_lagged.drop(df.index[0])

In [8]:
df_lagged.head()

Unnamed: 0,Date,A,B,A_lagged
1,2014-01-02,13.135447,8.013742,12.333677
2,2014-01-03,13.885421,9.309362,13.135447
3,2014-01-04,14.853525,11.225588,13.885421
4,2014-01-05,15.735236,12.557178,14.853525
5,2014-01-06,16.571813,13.540015,15.735236


In [9]:
df.tail()

Unnamed: 0,Date,A,B
1495,2018-02-04,35.372468,28.984679
1496,2018-02-05,37.148504,31.260215
1497,2018-02-06,37.445087,31.602043
1498,2018-02-07,38.42058,32.876002
1499,2018-02-08,38.848142,33.327224


In [10]:
# the mean value of series A changes a little bit so i need to recompute it.
# (because the lagged is not defined for the 1st value and we only have 1499 values now).
A_mean = df_lagged['A'].mean()
A_std = df_lagged['A'].std()
A_lagged_mean = df_lagged['A_lagged'].mean()
A_lagged_std = df_lagged['A_lagged'].std()
lower_range_x = A_mean - 2*A_std
upper_range_x = A_mean + 2*A_std
lower_range_y = A_lagged_mean - 2*A_lagged_std
upper_range_y = A_lagged_mean + 2*A_lagged_std

In [12]:
# we need to slightly alter our data list components to reference the new lagged variables:
# scatter points:
series_scatter_points = {
    'x': df_lagged['A'],
    'y': df_lagged['A_lagged'],
    'mode': 'markers',
    'marker': {'size': 5}
}

balance_point = {
    'x' : [A_mean],
    'y' : [A_lagged_mean],
    'mode' : 'markers',
    'marker' : {'size': 12, 'color' : pt_color_black}
}

lower_vertical = {
    'x': [lower_range_x, lower_range_x],
    'y': [min_y, max_y],
    'mode': 'lines',
    'line': guide_line_style
}

upper_vertical = {
    'x' : [upper_range_x, upper_range_x],
    'y' : [min_y, max_y],
    'mode' : 'lines',
    'line' : guide_line_style
}

lower_horizontal = {
    'x' : [min_x, max_x],
    'y': [lower_range_y, lower_range_y],
    'mode' : 'lines',
    'line' : guide_line_style    
}

upper_horizontal = {
    'x' : [min_x, max_x],
    'y': [upper_range_y, upper_range_y],
    'mode' : 'lines',
    'line' : guide_line_style    
}

In [None]:
# new titles
plot_title = 'Scatter plot of A vs lagged A'
x_title = 'Series A'
y_title = 'Series A lagged'
# reset the layout
layout = {
    'title': plot_title,
    'xaxis': {
        'title': x_title,
        'range': [min_x, max_x]
        },
    'yaxis': {
        'title': y_title,
        'range': [min_y, max_y]
        },
    'showlegend': False
}

In [13]:
# now we just have to put it all together:
figure = {
    'data':
        [series_scatter_points,
            balance_point,
            lower_vertical,
            upper_vertical,
            lower_horizontal,
            upper_horizontal
        ],
    'layout': layout
}

iplot(figure)