In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [3]:
# Assuming the data is loaded from the provided CSV
data = pd.read_csv('heart.csv')

# Setting up the plot style
plt.style.use('ggplot')

In [4]:
def plot_histograms():
    continuous_vars = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
    fig, axes = plt.subplots(3, 2, figsize=(12, 10))
    axes = axes.ravel()
    
    for idx, var in enumerate(continuous_vars):
        axes[idx].hist(data[var], bins=30, edgecolor='black')
        axes[idx].set_title(f'Distribution of {var}')
        axes[idx].set_xlabel(var)
        axes[idx].set_ylabel('Frequency')
    
    axes[-1].axis('off')  # Hide the last empty subplot
    plt.tight_layout()
    plt.savefig('histograms.png')
    plt.close()

In [5]:
# b. Pie Charts
# Objective: Show the proportion of categorical variables and target variable
def plot_pie_charts():
    categorical_vars = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'target']
    fig, axes = plt.subplots(2, 3, figsize=(15, 8))
    axes = axes.ravel()
    
    for idx, var in enumerate(categorical_vars):
        value_counts = data[var].value_counts()
        axes[idx].pie(value_counts, labels=value_counts.index, autopct='%1.1f%%')
        axes[idx].set_title(f'Proportion of {var}')
    
    plt.tight_layout()
    plt.savefig('pie_charts.png')
    plt.close()

In [6]:
# c. Box Plots
# Objective: Examine the spread and outliers in continuous variables by target
def plot_box_plots():
    continuous_vars = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
    fig, axes = plt.subplots(3, 2, figsize=(12, 10))
    axes = axes.ravel()
    
    for idx, var in enumerate(continuous_vars):
        data.boxplot(column=var, by='target', ax=axes[idx])
        axes[idx].set_title(f'{var} by Heart Disease')
        axes[idx].set_xlabel('Heart Disease (0 = No, 1 = Yes)')
        axes[idx].set_ylabel(var)
    
    axes[-1].axis('off')
    plt.tight_layout()
    plt.suptitle('')  # Remove default title
    plt.savefig('box_plots.png')
    plt.close()

In [7]:
# d. Scatter Plots
# Objective: Explore relationships between pairs of continuous variables
def plot_scatter_plots():
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    axes = axes.ravel()
    
    scatter_pairs = [
        ('age', 'thalach'),
        ('trestbps', 'chol'),
        ('chol', 'thalach'),
        ('age', 'oldpeak')
    ]
    
    for idx, (x_var, y_var) in enumerate(scatter_pairs):
        scatter = axes[idx].scatter(data[x_var], data[y_var], 
                                  c=data['target'], cmap='viridis')
        axes[idx].set_xlabel(x_var)
        axes[idx].set_ylabel(y_var)
        axes[idx].set_title(f'{y_var} vs {x_var}')
        plt.colorbar(scatter, ax=axes[idx], label='Heart Disease')
    
    plt.tight_layout()
    plt.savefig('scatter_plots.png')
    plt.close()

In [8]:
# e. Scatter Plot with Box Plots
# Objective: Combine distribution and relationship visualization for key variables
def plot_scatter_with_boxplots():
    fig = plt.figure(figsize=(10, 8))
    
    # Define the main scatter plot
    scatter_ax = fig.add_axes([0.2, 0.2, 0.6, 0.6])
    scatter = scatter_ax.scatter(data['age'], data['thalach'], 
                              c=data['target'], cmap='viridis')
    scatter_ax.set_xlabel('Age')
    scatter_ax.set_ylabel('Maximum Heart Rate (thalach)')
    scatter_ax.set_title('Maximum Heart Rate vs Age with Distributions')
    
    # Add boxplot for x-axis (age)
    box_x_ax = fig.add_axes([0.2, 0.1, 0.6, 0.1])
    box_x_ax.boxplot([data[data['target'] == 0]['age'], 
                     data[data['target'] == 1]['age']],
                    vert=False)
    box_x_ax.set_yticks([1, 2])
    box_x_ax.set_yticklabels(['No Disease', 'Disease'])
    box_x_ax.set_xlabel('Age')
    
    # Add boxplot for y-axis (thalach)
    box_y_ax = fig.add_axes([0.1, 0.2, 0.1, 0.6])
    box_y_ax.boxplot([data[data['target'] == 0]['thalach'], 
                     data[data['target'] == 1]['thalach']])
    box_y_ax.set_xticks([1, 2])
    box_y_ax.set_xticklabels(['No Disease', 'Disease'])
    box_y_ax.set_ylabel('Maximum Heart Rate')
    
    plt.colorbar(scatter, ax=scatter_ax, label='Heart Disease')
    plt.savefig('scatter_with_boxplots.png')
    plt.close()

In [14]:

# Execute all visualizations
plot_histograms()
plot_pie_charts()
plot_box_plots()
plot_scatter_plots()
plot_scatter_with_boxplots()