In [1]:
import os
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
plt.style.use("ggplot")

In [2]:
path_data = os.path.join("data", "amostras_continuo.txt")
df = pd.read_csv(path_data, sep=" ", header=None, names=["X1", "X2", "X3", "T"])
print(df.shape)
df.head()

(500, 4)


Unnamed: 0,X1,X2,X3,T
0,-0.0579,0.0,0.0,-1
1,-1.6873,-0.0579,0.0,-1
2,-8.5791,-1.6873,-0.0579,-1
3,-0.0361,-8.5791,-1.6873,1
4,2.2593,-0.0361,-8.5791,1


In [3]:
def gaussian(x:int, mu:float, sig:float):
    return np.exp(-np.power(x - mu, 2) / (2 * np.power(sig, 2.)))

stats = df["X1"].describe()
sigma = 2
mean = stats.loc['mean']
min_ = stats.loc['min']
max_ = stats.loc['max']

In [4]:
df.filter(["X1","X2"]).cov()

Unnamed: 0,X1,X2
X1,5.718951,2.883025
X2,2.883025,5.711084


**Entropia - Parzen window**

$$
\hat{J}_{HS} = - \frac{1}{N_x}\sum_{i=1}^{N_x}\log \left(\frac{1}{N_x}\sum_{j=1}^{N_x}G_{\sigma^2}(x_i - x_j)\right)
$$

In [5]:
sigma = 1
N_x = len(df["X1"])

aux = []

for i in df["X1"]:
    aux.append(np.log2(sum(gaussian(i,j, sigma) for j in df["X1"])/N_x))
    
print((-1/N_x)*sum(aux))

1.9252367242880766


In [6]:
sigma = 1
N_x = len(df["X1"])

j_hs = (-1/N_x)*sum(np.log2(sum(gaussian(i,j, sigma) for j in df["X1"])/N_x) for i in df["X1"])
j_hs

1.9252367242880766

In [7]:
j_hs = (-1/N_x)*sum(np.log2(sum(gaussian(i, df["X1"],sigma))/N_x) for i in df["X1"])


In [8]:
from stats import get_continuos_entropy

df.apply(get_continuos_entropy, args = [gaussian, 1])

X1    1.925237
X2    1.923050
X3    1.922854
T     0.815314
dtype: float64

In [None]:
import numpy as np

np.array([1,2,3,4,5]).sum()

In [None]:
def gaussian(x:int, mu:float, sig:float):
    return np.exp(-np.power(x - mu, 2) / (2 * np.power(sig, 2.)))

gaussian(np.asarray([1,2,3]), 2, 2)

**numpy.linspace**
> Return evenly spaced numbers over a specified interval.

In [None]:
x_i = np.linspace(min_, max_, 100)

In [None]:
for x_j in range(1,4):
    y = list(map(lambda x: gaussian(x-x_j,mean,sigma),x_i))
    plt.plot(x_i, y)
    
plt.show()

In [None]:
N_x = len(df["X1"])

gaussians = np.array([list(map(lambda x: gaussian(x-x_j,mean,sigma)/N_x, x_i)) for x_j in df["X1"]])

In [None]:
gaussians.shape

In [None]:
for gaussian in gaussians:
    plt.plot(x_i, gaussian)
plt.show()

In [None]:
plt.plot(x_i,gaussians.sum(axis=0))
plt.show()

In [None]:
def gaussian(x:int, mu:float, sig:float):
    return np.exp(-np.power(x - mu, 2) / (2 * np.power(sig, 2.)))

def get_entropy_density(x:pd.Series, kernel="gaussian", kernel_size=0.3, num_xi=100):

    stats = x.describe()
    mu, min_, max_ = stats.loc['mean'], stats.loc['min'], stats.loc['max']
    x_i = np.linspace(min_, max_, num_xi)    
    
    N_x = len(x)
    gaussians = np.array([list(map(lambda x: gaussian(x-x_j,mu,kernel_size)/N_x, x_i)) for x_j in x])
    
    return -sum(np.log2(gaussians.sum(axis=0)))/num_xi

get_entropy_density(df["X1"])

In [None]:
get_entropy_density(df["X2"])

In [None]:
get_entropy_density(df["X3"])

In [None]:
df.filter(["X1", "X2"]).cov()

In [None]:
import numpy as np
from scipy.stats import multivariate_normal

In [None]:
cov = df.filter(["X1", "X2"]).cov().to_numpy()
mean = df.mean().loc[["X1","X2"]].to_numpy()

min1 = df["X1"].min()
min2 = df["X2"].min()

max1 = df["X1"].max()
max2 = df["X2"].max()

X1 = np.linspace(min1,max1,100)
X2 = np.linspace(min2,max2,100)
X1,X2 = np.meshgrid(X1,X2)
grid=np.array([X1.flatten(),X2.flatten()]).T

In [None]:
dist = multivariate_normal.pdf(grid,mean,cov)

In [None]:
dist.shape

In [None]:
'''
Caculate the multivariate normal density (pdf)

Keyword arguments:
    x = numpy array of a "d x 1" sample vector
    mu = numpy array of a "d x 1" mean vector
    cov = "numpy array of a d x d" covariance matrix
'''
numerator = -1/2
part1 = 1/(((2* np.pi)**(len(mu)/2))*(np.linalg.det(cov)**(1/2)) )
part2 = (-1/2) * ((x-mu).T.dot(np.linalg.inv(cov))).dot((x-mu))
float(part1 * np.exp(part2))

In [None]:
dist.pdf([1,2])