# Correlation Matrices and Heatmaps

<!--
Author: Molla Samser
Website: https://rskworld.in/
Email: help@rskworld.in
Phone: +91 93305 39277
Address: Nutanhat, Mongolkote, Purba Burdwan, West Bengal, India, 713147
-->

This notebook demonstrates correlation analysis and heatmap visualizations:
- Correlation matrices
- Heatmaps with annotations
- Cluster maps
- Custom color schemes


In [None]:
# Author: Molla Samser
# Website: https://rskworld.in/
# Email: help@rskworld.in
# Phone: +91 93305 39277
# Address: Nutanhat, Mongolkote, Purba Burdwan, West Bengal, India, 713147

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Set style
sns.set_style("white")
plt.rcParams['figure.figsize'] = (12, 8)

# Load data
df = pd.read_csv('../data/sample_data.csv')
print("Dataset shape:", df.shape)
df.head()


## 1. Basic Correlation Matrix


In [None]:
# Author: Molla Samser
# Website: https://rskworld.in/

# Select numeric columns for correlation
numeric_cols = ['age', 'income', 'education_years', 'experience', 'score', 'sales']
corr_matrix = df[numeric_cols].corr()

print("Correlation Matrix:")
print(corr_matrix)


## 2. Basic Heatmap


In [None]:
# Author: Molla Samser
# Website: https://rskworld.in/

# Basic heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0,
            square=True, linewidths=1, cbar_kws={"shrink": 0.8})
plt.title('Correlation Heatmap', fontsize=16, fontweight='bold', pad=20)
plt.tight_layout()
plt.savefig('../images/correlation_heatmap.png', dpi=300, bbox_inches='tight')
plt.show()


## 3. Heatmap with Custom Color Scheme


In [None]:
# Author: Molla Samser
# Website: https://rskworld.in/

# Heatmap with different color scheme
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='viridis', 
            square=True, linewidths=2, cbar_kws={"shrink": 0.8},
            vmin=-1, vmax=1)
plt.title('Correlation Heatmap (Viridis)', fontsize=16, fontweight='bold', pad=20)
plt.tight_layout()
plt.savefig('../images/heatmap_viridis.png', dpi=300, bbox_inches='tight')
plt.show()


## 4. Masked Heatmap (Upper Triangle)


In [None]:
# Author: Molla Samser
# Website: https://rskworld.in/

# Create mask for upper triangle
mask = np.triu(np.ones_like(corr_matrix, dtype=bool))

plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, mask=mask, annot=True, fmt='.2f', cmap='coolwarm',
            square=True, linewidths=1, cbar_kws={"shrink": 0.8}, center=0)
plt.title('Correlation Heatmap (Upper Triangle Masked)', fontsize=16, fontweight='bold', pad=20)
plt.tight_layout()
plt.savefig('../images/heatmap_masked.png', dpi=300, bbox_inches='tight')
plt.show()


## 5. Clustered Heatmap


In [None]:
# Author: Molla Samser
# Website: https://rskworld.in/

# Clustermap - clusters similar variables together
g = sns.clustermap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm',
                   figsize=(10, 10), center=0, square=True, linewidths=1)
g.fig.suptitle('Clustered Correlation Heatmap', fontsize=16, fontweight='bold', y=1.02)
plt.savefig('../images/clustermap.png', dpi=300, bbox_inches='tight')
plt.show()


## 6. Correlation with Specific Threshold


In [None]:
# Author: Molla Samser
# Website: https://rskworld.in/

# Highlight strong correlations (threshold > 0.5)
threshold = 0.5
strong_corr = corr_matrix.copy()
strong_corr[abs(strong_corr) < threshold] = 0

plt.figure(figsize=(10, 8))
sns.heatmap(strong_corr, annot=True, fmt='.2f', cmap='RdYlBu_r',
            square=True, linewidths=1, cbar_kws={"shrink": 0.8}, center=0,
            vmin=-1, vmax=1)
plt.title(f'Strong Correlations (|r| > {threshold})', fontsize=16, fontweight='bold', pad=20)
plt.tight_layout()
plt.savefig('../images/strong_correlation.png', dpi=300, bbox_inches='tight')
plt.show()
