In [4]:
import numpy as np
import pandas as pd

# set a random seed for reproductibility
np.random.seed(42)

In [5]:
# ======================
# Advanced Visualizations
# ======================

# 1. Enhanced Pairwise Relationships
plt.figure(figsize=(12,8))
sns.pairplot(df[['Age', 'MonthlySales', 'PerformanceScore', 'Department']], 
             hue='Department', 
             palette='rocket',
             corner=True,
             plot_kws={'alpha':0.7, 'edgecolor':'black'})
plt.suptitle('Multivariate Relationships Analysis', y=1.02)
plt.show()

# 2. Correlation Heatmap with Significance
corr_matrix = df[['Age', 'MonthlySales', 'PerformanceScore']].corr()
mask = np.triu(np.ones_like(corr_matrix, dtype=bool))

plt.figure(figsize=(10,6))
sns.heatmap(corr_matrix, 
           annot=True, 
           mask=mask,
           cmap='rocket',
           fmt=".2f",
           linewidths=.5,
           annot_kws={"size":12})
plt.title('Correlation Matrix with Significance Stars\n*: p<0.05, **: p<0.01', pad=20)
plt.show()

# 3. Enhanced Department Analysis
plt.figure(figsize=(14,6))
plt.subplot(1,2,1)
sns.boxplot(x='Department', y='PerformanceScore', data=df,
           palette='rocket', 
           showmeans=True,
           meanprops={"marker":"o","markerfacecolor":"white"})
plt.title('Performance Score Distribution by Department', pad=15)
plt.xticks(rotation=45)

plt.subplot(1,2,2)
sns.violinplot(x='Department', y='MonthlySales', data=df,
              palette='rocket',
              inner='quartile')
plt.title('Monthly Sales Distribution by Department', pad=15)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# ======================
# Advanced Statistical Analysis
# ======================

# 4. Age Group Analysis
df['AgeGroup'] = pd.cut(df['Age'], 
                       bins=[20,30,40,50,60],
                       labels=['20-30', '30-40', '40-50', '50-60'])

age_group_analysis = df.groupby('AgeGroup').agg(
    Avg_Performance=('PerformanceScore', 'mean'),
    Avg_Sales=('MonthlySales', 'mean'),
    Count=('EmployeeID', 'count')
).reset_index()

print("Age Group Analysis:")
display(age_group_analysis.style.background_gradient(cmap='rocket'))

# 5. Performance Segmentation
df['PerformanceCategory'] = pd.qcut(df['PerformanceScore'],
                                   q=[0, 0.25, 0.75, 1],
                                   labels=['Low', 'Medium', 'High'])

plt.figure(figsize=(10,6))
sns.scatterplot(x='MonthlySales', y='Age', data=df,
               hue='PerformanceCategory',
               palette='rocket_r',
               size='PerformanceCategory',
               sizes=(20, 200),
               alpha=0.7)
plt.title('Performance Segmentation by Age & Sales', pad=15)
plt.xlabel('Monthly Sales (USD)', labelpad=10)
plt.ylabel('Age', labelpad=10)
plt.legend(title='Performance Tier', bbox_to_anchor=(1.05, 1))
plt.show()

# ======================
# Advanced Insights
# ======================

# 6. Outlier Detection
z_scores = np.abs((df[['MonthlySales', 'PerformanceScore']] - 
                 df[['MonthlySales', 'PerformanceScore']].mean()) / 
                 df[['MonthlySales', 'PerformanceScore']].std())

outliers = df[z_scores > 3].any(axis=1)
print(f"\nPotential Outliers Detected: {outliers.sum()}")

# 7. Department Efficiency Ratio
dept_efficiency = df.groupby('Department').apply(
    lambda x: x['MonthlySales'].mean() / x['PerformanceScore'].mean()
).sort_values(ascending=False)

print("\nDepartment Efficiency Ratio (Sales per Performance Point):")
display(dept_efficiency.to_frame(name='Efficiency').style.bar(color='#F3764E'))

# 8. Interactive 3D Plot (if using Jupyter)
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111, projection='3d')

scatter = ax.scatter(df['Age'], 
                    df['MonthlySales'], 
                    df['PerformanceScore'],
                    c=df['PerformanceScore'],
                    cmap='rocket',
                    s=50,
                    alpha=0.7)

ax.set_xlabel('Age', labelpad=15)
ax.set_ylabel('Monthly Sales', labelpad=15)
ax.set_zlabel('Performance Score', labelpad=15)
plt.title('3D Relationship: Age, Sales & Performance', y=1.02)
fig.colorbar(scatter, pad=0.1)
plt.show()

NameError: name 'plt' is not defined