In [12]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load and preprocess the Titanic dataset (example preprocessing)
def load_and_preprocess_data():
    # Load the dataset (replace with your actual data loading method)
    # For example: df = pd.read_csv('titanic.csv')
    # Here, we assume titanic_clean is already loaded
    # Placeholder for preprocessing (handle missing values, etc.)
    df = pd.DataFrame({
        'Survived': [0, 1, 1, 1, 0],
        'Pclass': [3, 1, 3, 1, 3],
        'Name': ['Mr. Owen Harris Braund', 'Mrs. John Bradley Cumings', 'Miss. Laina Heikkinen', 'Mrs. Jacques Heath Futrelle', 'Mr. William Henry Allen'],
        'Sex': ['male', 'female', 'female', 'female', 'male'],
        'Age': [22, 38, 26, 35, 35],
        'SibSp': [1, 1, 0, 1, 0],
        'Parch': [0, 0, 0, 0, 0],
        'Ticket': ['A/5 21171', 'PC 17599', 'STON/O2. 3101282', '113803', '373450'],
        'Fare': [7.25, 71.2833, 7.925, 53.1, 8.05],
        'Cabin': [None, 'C85', None, 'C123', None],
        'Embarked': ['S', 'C', 'S', 'S', 'S']
    })
    
    # Basic preprocessing: Handle missing values
    df['Age'] = df['Age'].fillna(df['Age'].median())
    df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])
    df['Fare'] = df['Fare'].fillna(df['Fare'].median())
    
    return df

def plot_correlations(df):
    # Select only numeric columns for correlation matrix
    numeric_df = df.select_dtypes(include=['float64', 'int64'])
    
    # Compute correlation matrix
    corr_matrix = numeric_df.corr()
    
    # Plot heatmap
    plt.figure(figsize=(12, 8))
    sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0)
    plt.title('Features Correlated with Survival')
    plt.savefig('correlation_heatmap.png')  # Save instead of plt.show()
    plt.close()

def survival_analysis(df):
    # Survival by different features
    plt.figure(figsize=(15, 10))
    
    # Example: Survival by Pclass
    plt.subplot(2, 2, 1)
    sns.countplot(x='Pclass', hue='Survived', data=df)
    plt.title('Survival by Passenger Class')
    
    # Example: Survival by Sex
    plt.subplot(2, 2, 2)
    sns.countplot(x='Sex', hue='Survived', data=df)
    plt.title('Survival by Sex')
    
    # Example: Survival by Age (binned)
    df['AgeBin'] = pd.cut(df['Age'], bins=[0, 12, 18, 30, 50, 100], labels=['Child', 'Teen', 'Young Adult', 'Adult', 'Senior'])
    plt.subplot(2, 2, 3)
    sns.countplot(x='AgeBin', hue='Survived', data=df)
    plt.title('Survival by Age Group')
    
    # Example: Survival by Embarked
    plt.subplot(2, 2, 4)
    sns.countplot(x='Embarked', hue='Survived', data=df)
    plt.title('Survival by Port of Embarkation')
    
    plt.tight_layout()
    plt.savefig('survival_analysis.png')  # Save instead of plt.show()
    plt.close()

# Main execution
if __name__ == "__main__":
    # Load and preprocess data
    titanic_clean = load_and_preprocess_data()
    
    # Generate correlation plot
    plot_correlations(titanic_clean)
    
    # Generate survival analysis plots
    survival_analysis(titanic_clean)