<a href="https://colab.research.google.com/github/olcaykursun/ML/blob/main/cov_corr_iris.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.datasets import load_iris

# Load the iris dataset
iris = load_iris()
X = iris.data  # This is a (150, 4) array where each column is a feature

# Calculate covariance matrix
cov_matrix = np.cov(X, rowvar=False)

# Calculate correlation matrix
corr_matrix = np.corrcoef(X, rowvar=False)

# Extract the diagonal of the covariance matrix
diag_cov = np.diag(cov_matrix)

# Compute the correlation using the relationship
computed_corr_matrix = cov_matrix / np.sqrt(np.outer(diag_cov, diag_cov))

print("Original Correlation Matrix:\n", corr_matrix)
print("\nComputed Correlation Matrix from Covariance:\n", computed_corr_matrix)

# Check if the two matrices are close enough
are_close = np.allclose(corr_matrix, computed_corr_matrix, atol=1e-8)

print("\nAre the matrices close enough?", are_close)


Original Correlation Matrix:
 [[ 1.         -0.11756978  0.87175378  0.81794113]
 [-0.11756978  1.         -0.4284401  -0.36612593]
 [ 0.87175378 -0.4284401   1.          0.96286543]
 [ 0.81794113 -0.36612593  0.96286543  1.        ]]

Computed Correlation Matrix from Covariance:
 [[ 1.         -0.11756978  0.87175378  0.81794113]
 [-0.11756978  1.         -0.4284401  -0.36612593]
 [ 0.87175378 -0.4284401   1.          0.96286543]
 [ 0.81794113 -0.36612593  0.96286543  1.        ]]

Are the matrices close enough? True


In [10]:
print(cov_matrix)
print(np.cov(X, rowvar=False))

mycov = np.zeros((4,4))
for i in range(4):
  xi = X[:,i]
  for j in range(4):
    xj = X[:,j]
    mycov[i,j] = np.dot(xi-xi.mean(),xj-xj.mean())
print(mycov / 150)

[[ 0.68569351 -0.042434    1.27431544  0.51627069]
 [-0.042434    0.18997942 -0.32965638 -0.12163937]
 [ 1.27431544 -0.32965638  3.11627785  1.2956094 ]
 [ 0.51627069 -0.12163937  1.2956094   0.58100626]]
[[ 0.68569351 -0.042434    1.27431544  0.51627069]
 [-0.042434    0.18997942 -0.32965638 -0.12163937]
 [ 1.27431544 -0.32965638  3.11627785  1.2956094 ]
 [ 0.51627069 -0.12163937  1.2956094   0.58100626]]
[[ 0.68112222 -0.04215111  1.26582     0.51282889]
 [-0.04215111  0.18871289 -0.32745867 -0.12082844]
 [ 1.26582    -0.32745867  3.09550267  1.286972  ]
 [ 0.51282889 -0.12082844  1.286972    0.57713289]]


In [4]:
# Use the StandardScaler to scale the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_X = scaler.fit_transform(X)

# Compute the covariance matrix using X^T X divided by (n-1), where n is the number of samples
cov_matrix = (scaled_X.T @ scaled_X) / (scaled_X.shape[0] - 1)

# The diagonal of the covariance matrix for z-score scaled data contains variances (which will be 1 for each feature)
diag_cov = np.diagonal(cov_matrix)

# Compute the correlation matrix using the relationship
computed_corr_matrix = cov_matrix / np.sqrt(np.outer(diag_cov, diag_cov))

print(computed_corr_matrix)


[[ 1.         -0.11756978  0.87175378  0.81794113]
 [-0.11756978  1.         -0.4284401  -0.36612593]
 [ 0.87175378 -0.4284401   1.          0.96286543]
 [ 0.81794113 -0.36612593  0.96286543  1.        ]]
