In [None]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [None]:
N = 100
x = np.random.normal(size=N)
y = .5 * x + 0.2 * (np.random.normal(size=N))

fig = go.Figure(data=[go.Scatter(x=x, y=y, mode='markers', marker=dict(size=8, opacity=0.5), name="data")])

fig.update_layout(xaxis_title="x", yaxis_title="y", yaxis=dict(scaleanchor="x", scaleratio=1))
fig.show()

In [None]:
print("Variance in x =", np.var(x))
print("Variance in y =", np.var(y))

In [None]:
# covariance calculation
def covariance(a, b):
    return ((a - a.mean()) * (b - b.mean())).sum() / (len(a) - 1)


print("Covariance of x & y =", covariance(x, y))
print("Covariance of y & x =", covariance(x, y))
print("Covariance of x with itself =", covariance(x, x), ", variance of x =", np.var(x))
print("Covariance of y with itself =", covariance(y, y), ", variance of x =", np.var(y))

In [None]:
data = np.stack((x, y), axis=1)  # pack the x & y data together in one 2D array
print("data.shape =", data.shape)

cov = np.cov(data.T)  # .T b/c numpy wants variables along rows rather than down columns
print("covariance matrix = \n", cov)

In [None]:
z = -.5 * x + 2 * np.random.uniform(size=N)
data = np.stack((x, y, z)).T
print("data.shape =", data.shape)
cov = np.cov(data.T)
print("Covariance matrix = \n", cov)

import plotly.graph_objects as go

fig = go.Figure(data=[go.Scatter3d(x=x, y=y, z=z, mode='markers', marker=dict(size=8, opacity=0.5), name="data")])
fig.update_layout(xaxis_title="x", yaxis_title="y", yaxis=dict(scaleanchor="x", scaleratio=1))
fig.show()

In [None]:
from numpy import linalg as LA

lambdas, vs = LA.eig(cov)
lambdas, vs

In [4]:
N = 100
x = np.random.normal(size=N)
y = .5*x + 0.2*(np.random.normal(size=N))

fig = go.Figure(data=[go.Scatter(x=x,y=y,mode='markers',marker=dict(size=8,opacity=0.5), name="data")])

fig.update_layout(xaxis_title="x",yaxis_title="y",yaxis = dict(scaleanchor="x",scaleratio=1))
fig.show()

In [5]:
print("Variance in x =",np.var(x))
print("Variance in y =",np.var(y))

Variance in x = 0.855372393659724
Variance in y = 0.2878284051327864


In [7]:
# covariance calculation
def covariance(a,b):
    return ((a-a.mean())*(b-b.mean()) ).sum()   / (len(a)-1)

print("Covariance of x & y =",covariance(x,y))
print("Covariance of y & x =",covariance(x,y))
print("Covariance of x with itself =",covariance(x,x),", variance of x =",np.var(x))
print("Covariance of y with itself =",covariance(y,y),", variance of x =",np.var(y))

Covariance of x & y = 0.46255334838302653
Covariance of y & x = 0.46255334838302653
Covariance of x with itself = 0.8640125188482061 , variance of x = 0.855372393659724
Covariance of y with itself = 0.2907357627603903 , variance of x = 0.2878284051327864


In [9]:
data = np.stack((x,y),axis=1) # pack the x & y data together in one 2D array
print("data.shape =",data.shape)

cov = np.cov(data.T) # .T b/c numpy wants variables along rows rather than down columns
print("covariance matrix = \n",cov)

data.shape = (100, 2)
covariance matrix = 
 [[0.86401252 0.46255335]
 [0.46255335 0.29073576]]


In [12]:
z = -.5*x + 2*np.random.uniform(size=N)
data = np.stack((x,y,z)).T
print("data.shape =",data.shape)
cov = np.cov(data.T)
print("Covariance matrix = \n",cov)

import plotly.graph_objects as go
fig = go.Figure(data=[go.Scatter3d(x=x,y=y,z=z, mode='markers', marker=dict(size=8,opacity=0.5),name="data" )])
fig.update_layout(xaxis_title="x",yaxis_title="y",yaxis=dict(scaleanchor = "x", scaleratio = 1))
fig.show()

data.shape = (100, 3)
Covariance matrix = 
 [[ 0.86401252  0.46255335 -0.46727612]
 [ 0.46255335  0.29073576 -0.27990296]
 [-0.46727612 -0.27990296  0.57786195]]


In [13]:
from numpy import linalg as LA
lambdas, vs = LA.eig(cov)
lambdas, vs

(array([1.458275  , 0.24221509, 0.03212014]),
 array([[-0.73995093, -0.49285604, -0.4577833 ],
        [-0.41925852, -0.19427288,  0.88683727],
        [ 0.52601799, -0.84814561,  0.06288169]]))