In [1]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
import itertools
from prettytable import PrettyTable

In [2]:
iris = load_iris()

In [3]:
columns = iris.feature_names
print(columns)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [4]:
X = pd.DataFrame(iris.data, columns = columns)
y = pd.DataFrame(iris.target, columns = ["labels"])

In [5]:
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [6]:
y.head()

Unnamed: 0,labels
0,0
1,0
2,0
3,0
4,0


**Hand Implementation**

In [7]:
def pearson_corr(feature1, feature2):
    """
    Input Params : Feature 1 and Feature 2 of type numpy array
    returns: pearsons correlation coefficient along with two feature names
    """
    feature1_mean, feature2_mean = np.mean(feature1), np.mean(feature2)
    covariance = np.sum((feature1 - feature1_mean) * (feature2 - feature2_mean))
    std_feature1, std_feature2 = np.sqrt(np.sum((feature1 - feature1_mean)**2)), np.sqrt(np.sum((feature2 - feature2_mean)**2))
    pearson_coeff = (covariance/(std_feature1 * std_feature2))
    pearson_coeff = f"{pearson_coeff:.5f}"
    return pearson_coeff

In [8]:
feature_set1 = feature_set2 = columns
feature_set_permutations = list(itertools.permutations(feature_set1, 2))
print(feature_set_permutations)
print("\nSize of the permutations is", len(feature_set_permutations))

[('sepal length (cm)', 'sepal width (cm)'), ('sepal length (cm)', 'petal length (cm)'), ('sepal length (cm)', 'petal width (cm)'), ('sepal width (cm)', 'sepal length (cm)'), ('sepal width (cm)', 'petal length (cm)'), ('sepal width (cm)', 'petal width (cm)'), ('petal length (cm)', 'sepal length (cm)'), ('petal length (cm)', 'sepal width (cm)'), ('petal length (cm)', 'petal width (cm)'), ('petal width (cm)', 'sepal length (cm)'), ('petal width (cm)', 'sepal width (cm)'), ('petal width (cm)', 'petal length (cm)')]

Size of the permutations is 12


In [9]:
coeff_results = [(each_feature, pearson_corr(X[each_feature[0]].values, X[each_feature[1]].values)) for each_feature in feature_set_permutations]

In [10]:
x = PrettyTable()
x.field_names = ["Feature Names", "Pearson's Correlation Coefficient"]
for each_result in coeff_results:
    x.add_row([each_result[0], each_result[1]])

In [11]:
print(x)

+--------------------------------------------+-----------------------------------+
|               Feature Names                | Pearson's Correlation Coefficient |
+--------------------------------------------+-----------------------------------+
| ('sepal length (cm)', 'sepal width (cm)')  |              -0.11757             |
| ('sepal length (cm)', 'petal length (cm)') |              0.87175              |
| ('sepal length (cm)', 'petal width (cm)')  |              0.81794              |
| ('sepal width (cm)', 'sepal length (cm)')  |              -0.11757             |
| ('sepal width (cm)', 'petal length (cm)')  |              -0.42844             |
|  ('sepal width (cm)', 'petal width (cm)')  |              -0.36613             |
| ('petal length (cm)', 'sepal length (cm)') |              0.87175              |
| ('petal length (cm)', 'sepal width (cm)')  |              -0.42844             |
| ('petal length (cm)', 'petal width (cm)')  |              0.96287              |
| ('