## Cluster Analysis

In [None]:
# Load original data with features
df = pd.read_csv('preprocessed_campaign_data.csv')
df['Cluster'] = cluster_labels

# Profile clusters
cluster_profiles = df.groupby('Cluster').agg({
    'Income': 'median',
    'Total_Spend': 'median',
    'Recency': 'median',
    'Family_Size': 'median',
    'MntWines': 'sum',
    'NumWebVisitsMonth': 'mean'
}).reset_index()

# Visualize profiles
plt.figure(figsize=(12, 8))
sns.heatmap(cluster_profiles.set_index('Cluster'), 
            annot=True, cmap='Blues', fmt='.1f')
plt.title('Customer Segment Profiles', pad=20)
plt.savefig('cluster_profiles.png', bbox_inches='tight')
plt.close()

## Interactive Visualization

In [None]:
# t-SNE for 2D visualization
tsne = TSNE(n_components=2, random_state=42)
embeddings_2d = tsne.fit_transform(embeddings)

# Create interactive plot
plot_df = pd.DataFrame({
    'x': embeddings_2d[:, 0],
    'y': embeddings_2d[:, 1],
    'Cluster': cluster_labels,
    'Income': df['Income'],
    'Total_Spend': df['Total_Spend']
})

fig = px.scatter(plot_df, x='x', y='y', color='Cluster',
                 hover_data=['Income', 'Total_Spend'],
                 title='Customer Segments (t-SNE)')
fig.write_html('interactive_clusters.html')

## Save Results

In [None]:
# Save cluster labels
df.to_csv('clustered_customers.csv', index=False)

# Generate cluster descriptions
cluster_descriptions = []
for cluster in range(optimal_clusters):
    subset = df[df['Cluster'] == cluster]
    desc = f"""
    Cluster {cluster} (n={len(subset)}):
    - Median Income: ${subset['Income'].median():,.0f}
    - Median Spend: ${subset['Total_Spend'].median():,.0f}
    - Key Characteristics: {'High' if subset['MntWines'].median() > 300 else 'Low'} wine spenders,
      {'Frequent' if subset['Recency'].median() < 30 else 'Infrequent'} purchasers
    """
    cluster_descriptions.append(desc)

with open('cluster_descriptions.txt', 'w') as f:
    f.write("\n".join(cluster_descriptions))

print("Clustering completed!")
print(f"- Silhouette Score: {silhouette_score(embeddings, cluster_labels):.3f}")
print("- Visualizations saved: cluster_metrics.png, cluster_profiles.png")
print("- Interactive plot: interactive_clusters.html")