In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go
import plotly.express as px


In [6]:
filepath = "RELIANCE_2010-01-012021-08-30.csv"
data = pd.read_csv(filepath, usecols=[0,4], names=['date', 'close'], header=0)
data = data.sort_values('date')
data['date'] = pd.to_datetime(data['date'])
data.head()

Unnamed: 0,date,close
0,2010-01-04,1075.5
1,2010-01-05,1070.7
2,2010-01-06,1088.0
3,2010-01-07,1106.05
4,2010-01-08,1103.15


In [7]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data['date'], y=data['close'], mode='lines', name='closing price'))

fig.update_xaxes(range=["2009-11-01", "2021-11-01"])
fig.update_yaxes(range=[500, 2500])

In [8]:
def splitData(ts):

    test_set_size = int(np.round(0.2*len(ts)));
    train_set = ts[:-test_set_size]    
    test_set = ts[-test_set_size:]

    return train_set, test_set


In [9]:
def modify_test_data(test_set, mean, variance):
    
    noise = np.random.normal(mean, variance, len(test_set))
    # print("noise", noise[:50])
    modified_test_Set = test_set + noise

    return modified_test_Set


In [10]:
train_set, test_set = splitData(data['close'].values)

In [11]:
X1, X2 = train_set, test_set
mean1, mean2 = X1.mean(), X2.mean()
var1, var2 = X1.var(), X2.var()
print('mean1=%f, mean2=%f' % (mean1, mean2))
print('variance1=%f, variance2=%f' % (var1, var2))

mean1=973.087522, mean2=1718.089983
variance1=28266.246848, variance2=136205.599459


### Compute KL-Divergence and Jensen-Shannon Divergence

#### 1. KL Divergence

In [12]:
def KLD(P,Q,delta_x=1):
    print("P", P)
    print("Q", Q)
    # print( P*np.log(P/Q))
    return np.sum(P* np.log(P/Q))
    # return np.sum( P * np.log( P/Q ) ) * delta_x

In [13]:
def sample_KLD(bins=10):
    values_1, edges_1 = np.histogram(X1,bins=bins,density=True)
    values_2, edges_2 = np.histogram(X2,bins=bins,density=True)
    
    #Consider only first std. deviation of original series i.e. P(x).
    # return KLD(0.68*values_1,values_2, 2/bins)
    return KLD(0.68*values_1,values_2)

In [14]:
x0 = X1
# Add 1 to shift the mean of the Gaussian distribution
x1 = X2

df =pd.DataFrame(dict(
    series=np.concatenate((["a"]*len(x0), ["b"]*len(x1))), 
    data  =np.concatenate((x0,x1))
))

px.histogram(df, x="data", color="series", barmode="overlay", nbins=10, histnorm='probability density')
# px.histogram(df, x="data", color="series", barmode="overlay")

In [15]:
print(' KLD = %+8.2e' % (sample_KLD(bins=10)))

P [4.91359102e-04 1.51627993e-03 1.73633646e-03 1.86595880e-03
 6.33039334e-04 2.26085476e-04 2.32114422e-04 1.53738124e-04
 3.61736762e-05 9.04341905e-05]
Q [8.39278724e-05 2.27804225e-04 1.07907264e-03 9.11216900e-04
 9.95144773e-04 1.55866049e-04 1.67855745e-04 1.41478413e-03
 1.33085626e-03 5.75505411e-04]
 KLD = +5.14e-03


In [16]:
X1, X2 = train_set, test_set[:50]
mean1, mean2 = X1.mean(), X2.mean()
var1, var2 = X1.var(), X2.var()
print('mean1=%f, mean2=%f' % (mean1, mean2))
print('variance1=%f, variance2=%f' % (var1, var2))

mean1=973.087522, mean2=1305.453000
variance1=28266.246848, variance2=1714.464541


In [17]:
x0 = X1
# Add 1 to shift the mean of the Gaussian distribution
x1 = X2

df =pd.DataFrame(dict(
    series=np.concatenate((["a"]*len(x0), ["b"]*len(x1))), 
    data  =np.concatenate((x0,x1))
))

px.histogram(df, x="data", color="series", barmode="overlay", nbins=10, histnorm='probability density')
# px.histogram(df, x="data", color="series", barmode="overlay")

In [18]:
print(' KLD = %+8.2e' % (sample_KLD(bins=10)))

P [4.91359102e-04 1.51627993e-03 1.73633646e-03 1.86595880e-03
 6.33039334e-04 2.26085476e-04 2.32114422e-04 1.53738124e-04
 3.61736762e-05 9.04341905e-05]
Q [0.00113122 0.01131222 0.01244344 0.00452489 0.00565611 0.01131222
 0.00452489 0.00113122 0.00113122 0.00339367]
 KLD = -1.22e-02


In [19]:
X1, X2 = train_set, test_set[50:100]
mean1, mean2 = X1.mean(), X2.mean()
var1, var2 = X1.var(), X2.var()
print('mean1=%f, mean2=%f' % (mean1, mean2))
print('variance1=%f, variance2=%f' % (var1, var2))

mean1=973.087522, mean2=1231.705000
variance1=28266.246848, variance2=2170.801025


In [20]:
x0 = X1
# Add 1 to shift the mean of the Gaussian distribution
x1 = X2

df =pd.DataFrame(dict(
    series=np.concatenate((["a"]*len(x0), ["b"]*len(x1))), 
    data  =np.concatenate((x0,x1))
))

px.histogram(df, x="data", color="series", barmode="overlay", nbins=10, histnorm='probability density')
# px.histogram(df, x="data", color="series", barmode="overlay")

In [21]:
print(' KLD = %+8.2e' % (sample_KLD(bins=10)))

P [4.91359102e-04 1.51627993e-03 1.73633646e-03 1.86595880e-03
 6.33039334e-04 2.26085476e-04 2.32114422e-04 1.53738124e-04
 3.61736762e-05 9.04341905e-05]
Q [0.00108932 0.00217865 0.00217865 0.0043573  0.0043573  0.00653595
 0.00544662 0.00653595 0.00653595 0.01525054]
 KLD = -6.86e-03


In [22]:
X1, X2 = train_set, test_set[100:200]
mean1, mean2 = X1.mean(), X2.mean()
var1, var2 = X1.var(), X2.var()
print('mean1=%f, mean2=%f' % (mean1, mean2))
print('variance1=%f, variance2=%f' % (var1, var2))

mean1=973.087522, mean2=1479.247000
variance1=28266.246848, variance2=6690.764491


In [23]:
x0 = X1
# Add 1 to shift the mean of the Gaussian distribution
x1 = X2

df =pd.DataFrame(dict(
    series=np.concatenate((["a"]*len(x0), ["b"]*len(x1))), 
    data  =np.concatenate((x0,x1))
))

px.histogram(df, x="data", color="series", barmode="overlay", nbins=10, histnorm='probability density')
# px.histogram(df, x="data", color="series", barmode="overlay")

In [24]:
print(' KLD = %+8.2e' % (sample_KLD(bins=10)))

P [4.91359102e-04 1.51627993e-03 1.73633646e-03 1.86595880e-03
 6.33039334e-04 2.26085476e-04 2.32114422e-04 1.53738124e-04
 3.61736762e-05 9.04341905e-05]
Q [0.00211864 0.00060533 0.00151332 0.00151332 0.0033293  0.00514528
 0.00242131 0.00514528 0.00635593 0.00211864]
 KLD = -2.01e-03


In [25]:
X1, X2 = train_set, test_set[200:300]
mean1, mean2 = X1.mean(), X2.mean()
var1, var2 = X1.var(), X2.var()
print('mean1=%f, mean2=%f' % (mean1, mean2))
print('variance1=%f, variance2=%f' % (var1, var2))

mean1=973.087522, mean2=1431.919500
variance1=28266.246848, variance2=65014.567495


In [26]:
x0 = X1
# Add 1 to shift the mean of the Gaussian distribution
x1 = X2

df =pd.DataFrame(dict(
    series=np.concatenate((["a"]*len(x0), ["b"]*len(x1))), 
    data  =np.concatenate((x0,x1))
))

px.histogram(df, x="data", color="series", barmode="overlay", nbins=10, histnorm='probability density')
# px.histogram(df, x="data", color="series", barmode="overlay")

In [27]:
print(' KLD = %+8.2e' % (sample_KLD(bins=10)))

P [4.91359102e-04 1.51627993e-03 1.73633646e-03 1.86595880e-03
 6.33039334e-04 2.26085476e-04 2.32114422e-04 1.53738124e-04
 3.61736762e-05 9.04341905e-05]
Q [0.00038061 0.00095152 0.00076122 0.00057091 0.00095152 0.00247395
 0.00142728 0.00047576 0.00095152 0.00057091]
 KLD = +2.79e-03


In [None]:
X1, X2 = train_set, test_set[400:500]
mean1, mean2 = X1.mean(), X2.mean()
var1, var2 = X1.var(), X2.var()
print('mean1=%f, mean2=%f' % (mean1, mean2))
print('variance1=%f, variance2=%f' % (var1, var2))

In [None]:
x0 = X1
# Add 1 to shift the mean of the Gaussian distribution
x1 = X2

df =pd.DataFrame(dict(
    series=np.concatenate((["a"]*len(x0), ["b"]*len(x1))), 
    data  =np.concatenate((x0,x1))
))

px.histogram(df, x="data", color="series", barmode="overlay", nbins=10, histnorm='probability density')
# px.histogram(df, x="data", color="series", barmode="overlay")

In [None]:
print(' KLD = %+8.2e' % (sample_KLD(bins=10)))

In [30]:
X1, X2 = train_set, test_set[400:500]
mean1, mean2 = X1.mean(), X2.mean()
var1, var2 = X1.var(), X2.var()
print('mean1=%f, mean2=%f' % (mean1, mean2))
print('variance1=%f, variance2=%f' % (var1, var2))

mean1=973.087522, mean2=2002.174500
variance1=28266.246848, variance2=5993.488475


In [32]:
x0 = X1
# Add 1 to shift the mean of the Gaussian distribution
x1 = X2

df =pd.DataFrame(dict(
    series=np.concatenate((["a"]*len(x0), ["b"]*len(x1))), 
    data  =np.concatenate((x0,x1))
))

px.histogram(df, x="data", color="series", barmode="overlay", nbins=10, histnorm='probability density')
# px.histogram(df, x="data", color="series", barmode="overlay")

In [33]:
print(' KLD = %+8.2e' % (sample_KLD(bins=10)))

P [4.91359102e-04 1.51627993e-03 1.73633646e-03 1.86595880e-03
 6.33039334e-04 2.26085476e-04 2.32114422e-04 1.53738124e-04
 3.61736762e-05 9.04341905e-05]
Q [0.00055532 0.00249896 0.00499792 0.00388727 0.00666389 0.00305428
 0.00249896 0.00138831 0.00055532 0.00166597]
 KLD = -7.35e-03


#### 2. Jensen Shannon Divergence

In [None]:
def JSdivergence(P, Q):
    """Compute the Jensen-Shannon divergence between two probability distributions.

    Input
    -----
    P, Q : array-like Probability distributions 
    """

    P = np.array(P)
    Q = np.array(Q)

    M = 0.5 * (P + Q)

    return 0.5 * (KLD(0.68*P, M) +KLD(0.68*Q, M))