In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston

def pearson_correlation(x, y):
    x_mean = x.mean()
    y_mean = y.mean()
    numerator = sum((x - x_mean) * (y - y_mean))
    denominator = ((sum((x - x_mean)*2)) * (sum((y - y_mean)2)))*0.5
    return numerator / denominator

# Load the Boston Housing dataset
boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = pd.Series(boston.target, name='target')

# Compute correlation coefficients
correlation_coefficients = {}
for column in X.columns:
    correlation_coefficients[column] = pearson_correlation(X[column], y)

# Visualize the correlation coefficients using a heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(pd.DataFrame(correlation_coefficients, index=['Correlation']).T,
            annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Feature Correlations with Target Variable')
plt.show()

# Feature selection using thresholding
threshold = 0.5
selected_features = [feature for feature, correlation in correlation_coefficients.items() if abs(correlation) > threshold]

# Print the features before and after selection
print("All features:")
print(list(X.columns))
print("\nSelected features:")
print(selected_features)

# Create a new DataFrame with the selected features
X_selected = X[selected_features]

# Visualize the selected features correlation coefficients using a heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(pd.DataFrame(correlation_coefficients, index=['Correlation'], columns=X.columns).T.loc[selected_features],
            annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Selected Feature Correlations with Target Variable')
plt.show()