# Data Exploration - Customer Churn Analysis

This notebook provides an exploratory data analysis of the customer churn dataset used in our MLOps pipeline.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
df = pd.read_csv('../data/raw/customer_churn.csv')
print(f"Dataset shape: {df.shape}")
df.head()

In [None]:
# Basic statistics
print("Dataset Info:")
print(df.info())
print("\nChurn Distribution:")
print(df['churn'].value_counts(normalize=True))

In [None]:
# Visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Churn distribution
df['churn'].value_counts().plot(kind='bar', ax=axes[0,0], title='Churn Distribution')

# Age distribution
df['age'].hist(bins=20, ax=axes[0,1], title='Age Distribution')

# Monthly charges by churn
df.boxplot(column='monthly_charges', by='churn', ax=axes[1,0])
axes[1,0].set_title('Monthly Charges by Churn')

# Tenure by churn
df.boxplot(column='tenure', by='churn', ax=axes[1,1])
axes[1,1].set_title('Tenure by Churn')

plt.tight_layout()
plt.show()