# 100 Python EDA Coding Samples with Visualizations
## From Simple to Complex Data Visualizations

This notebook covers:
- Basic to Advanced Plots
- Multiple Chart Types
- Various Datasets
- Statistical Visualizations
- Interactive Plots

## Setup: Install Required Libraries

In [None]:
!pip install -q seaborn plotly pandas matplotlib numpy scipy scikit-learn squarify wordcloud missingno kaleido

## Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
import squarify
from wordcloud import WordCloud
import missingno as msno

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)

print("All libraries imported successfully!")

## Load Datasets

In [None]:
# Dataset 1: Titanic
titanic = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')
print("Titanic dataset loaded:", titanic.shape)

# Dataset 2: Iris
iris = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')
print("Iris dataset loaded:", iris.shape)

# Dataset 3: Tips
tips = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv')
print("Tips dataset loaded:", tips.shape)

# Dataset 4: Diamonds
diamonds = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/diamonds.csv')
print("Diamonds dataset loaded:", diamonds.shape)

# Dataset 5: Flight Passengers
flights = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/flights.csv')
print("Flights dataset loaded:", flights.shape)

# Section 1: Basic Univariate Plots (1-10)

### 1. Simple Histogram

In [None]:
plt.figure(figsize=(10, 6))
plt.hist(titanic['Age'].dropna(), bins=30, edgecolor='black', alpha=0.7)
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.title('Distribution of Passenger Ages')
plt.show()

### 2. Histogram with KDE

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(titanic['Age'].dropna(), bins=30, kde=True)
plt.title('Age Distribution with KDE')
plt.show()

### 3. Box Plot

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(data=titanic, y='Age')
plt.title('Box Plot of Age')
plt.show()

### 4. Violin Plot

In [None]:
plt.figure(figsize=(10, 6))
sns.violinplot(data=titanic, y='Age')
plt.title('Violin Plot of Age Distribution')
plt.show()

### 5. Bar Chart - Count Plot

In [None]:
plt.figure(figsize=(10, 6))
sns.countplot(data=titanic, x='Pclass')
plt.title('Passenger Count by Class')
plt.xlabel('Passenger Class')
plt.ylabel('Count')
plt.show()

### 6. Pie Chart

In [None]:
plt.figure(figsize=(10, 6))
titanic['Survived'].value_counts().plot(kind='pie', autopct='%1.1f%%', startangle=90)
plt.title('Survival Rate Distribution')
plt.ylabel('')
plt.show()

### 7. Density Plot

In [None]:
plt.figure(figsize=(10, 6))
titanic['Fare'].dropna().plot(kind='density')
plt.title('Density Plot of Fare')
plt.xlabel('Fare')
plt.show()

### 8. CDF Plot

In [None]:
plt.figure(figsize=(10, 6))
sorted_age = np.sort(titanic['Age'].dropna())
y = np.arange(1, len(sorted_age) + 1) / len(sorted_age)
plt.plot(sorted_age, y, marker='.', linestyle='none')
plt.xlabel('Age')
plt.ylabel('CDF')
plt.title('Cumulative Distribution Function of Age')
plt.grid(True)
plt.show()

### 9. Strip Plot

In [None]:
plt.figure(figsize=(10, 6))
sns.stripplot(data=tips, y='total_bill', alpha=0.5)
plt.title('Strip Plot of Total Bill')
plt.show()

### 10. Swarm Plot

In [None]:
plt.figure(figsize=(10, 6))
sns.swarmplot(data=iris, y='sepal_length')
plt.title('Swarm Plot of Sepal Length')
plt.show()

# Section 2: More Univariate Visualizations (11-20)

### 11. Q-Q Plot

In [None]:
plt.figure(figsize=(10, 6))
stats.probplot(titanic['Age'].dropna(), dist='norm', plot=plt)
plt.title('Q-Q Plot for Age')
plt.show()

### 12. ECDF Plot

In [None]:
plt.figure(figsize=(10, 6))
sns.ecdfplot(data=tips, x='total_bill')
plt.title('ECDF Plot of Total Bill')
plt.show()

### 13. Rug Plot

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(tips['total_bill'], kde=True)
sns.rugplot(tips['total_bill'], color='red', alpha=0.5)
plt.title('Distribution with Rug Plot')
plt.show()

### 14. Stem Plot

In [None]:
plt.figure(figsize=(10, 6))
x = tips['tip'].value_counts().sort_index().head(20)
plt.stem(x.index, x.values)
plt.title('Stem Plot of Tip Values')
plt.xlabel('Tip Amount')
plt.ylabel('Frequency')
plt.show()

### 15. Stacked Bar Chart

In [None]:
plt.figure(figsize=(10, 6))
pd.crosstab(titanic['Pclass'], titanic['Survived']).plot(kind='bar', stacked=True)
plt.title('Survival by Passenger Class (Stacked)')
plt.xlabel('Passenger Class')
plt.ylabel('Count')
plt.legend(['Not Survived', 'Survived'])
plt.show()

### 16. Horizontal Bar Chart

In [None]:
plt.figure(figsize=(10, 6))
titanic['Embarked'].value_counts().plot(kind='barh')
plt.title('Embarkation Port Distribution')
plt.xlabel('Count')
plt.ylabel('Port')
plt.show()

### 17. Area Plot

In [None]:
plt.figure(figsize=(10, 6))
flights_pivot = flights.pivot(index='year', columns='month', values='passengers')
flights_pivot.iloc[:, :3].plot(kind='area', alpha=0.5)
plt.title('Area Plot of Flight Passengers')
plt.xlabel('Year')
plt.ylabel('Passengers')
plt.show()

### 18. Step Plot

In [None]:
plt.figure(figsize=(10, 6))
sorted_fare = titanic['Fare'].dropna().sort_values().reset_index(drop=True)
plt.step(sorted_fare.index[:100], sorted_fare.values[:100], where='mid')
plt.title('Step Plot of Fare Distribution')
plt.xlabel('Index')
plt.ylabel('Fare')
plt.show()

### 19. Donut Chart

In [None]:
plt.figure(figsize=(10, 6))
colors = plt.cm.Set3(range(len(titanic['Pclass'].value_counts())))
plt.pie(titanic['Pclass'].value_counts(), labels=titanic['Pclass'].value_counts().index, autopct='%1.1f%%', colors=colors, startangle=90)
circle = plt.Circle((0, 0), 0.70, fc='white')
fig = plt.gcf()
fig.gca().add_artist(circle)
plt.title('Passenger Class Distribution (Donut Chart)')
plt.show()

### 20. Multiple Histograms

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes[0, 0].hist(iris['sepal_length'], bins=20, edgecolor='black')
axes[0, 0].set_title('Sepal Length')
axes[0, 1].hist(iris['sepal_width'], bins=20, edgecolor='black', color='orange')
axes[0, 1].set_title('Sepal Width')
axes[1, 0].hist(iris['petal_length'], bins=20, edgecolor='black', color='green')
axes[1, 0].set_title('Petal Length')
axes[1, 1].hist(iris['petal_width'], bins=20, edgecolor='black', color='red')
axes[1, 1].set_title('Petal Width')
plt.tight_layout()
plt.show()

# Section 3: Bivariate Plots (21-30)

### 21. Scatter Plot

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(iris['sepal_length'], iris['sepal_width'], alpha=0.6)
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.title('Sepal Length vs Sepal Width')
plt.show()

### 22. Scatter with Regression

In [None]:
plt.figure(figsize=(10, 6))
sns.regplot(data=tips, x='total_bill', y='tip')
plt.title('Total Bill vs Tip with Regression Line')
plt.show()

### 23. Line Plot

In [None]:
plt.figure(figsize=(10, 6))
flights_by_year = flights.groupby('year')['passengers'].sum()
plt.plot(flights_by_year.index, flights_by_year.values, marker='o')
plt.xlabel('Year')
plt.ylabel('Total Passengers')
plt.title('Total Passengers Over Years')
plt.grid(True)
plt.show()

### 24. Box Plot by Category

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(data=titanic, x='Pclass', y='Age')
plt.title('Age Distribution by Passenger Class')
plt.show()

### 25. Violin by Category

In [None]:
plt.figure(figsize=(10, 6))
sns.violinplot(data=tips, x='day', y='total_bill')
plt.title('Total Bill Distribution by Day')
plt.show()

### 26. Grouped Bar Chart

In [None]:
plt.figure(figsize=(10, 6))
pd.crosstab(titanic['Pclass'], titanic['Survived']).plot(kind='bar')
plt.title('Survival by Passenger Class')
plt.xlabel('Passenger Class')
plt.ylabel('Count')
plt.legend(['Not Survived', 'Survived'])
plt.xticks(rotation=0)
plt.show()

### 27. Correlation Heatmap

In [None]:
plt.figure(figsize=(10, 8))
corr = iris.select_dtypes(include=[np.number]).corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', center=0, square=True, linewidths=1)
plt.title('Correlation Heatmap - Iris Dataset')
plt.show()

### 28. Joint Plot

In [None]:
sns.jointplot(data=tips, x='total_bill', y='tip', kind='scatter', height=8)
plt.show()

### 29. Joint Plot KDE

In [None]:
sns.jointplot(data=tips, x='total_bill', y='tip', kind='kde', height=8)
plt.show()

### 30. Joint Plot Hex

In [None]:
sns.jointplot(data=tips, x='total_bill', y='tip', kind='hex', height=8)
plt.show()

# Section 4: More Bivariate Plots (31-40)

### 31. 2D Histogram

In [None]:
plt.figure(figsize=(10, 6))
plt.hist2d(iris['sepal_length'], iris['sepal_width'], bins=20, cmap='Blues')
plt.colorbar(label='Count')
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.title('2D Histogram of Sepal Dimensions')
plt.show()

### 32. Hexbin Plot

In [None]:
plt.figure(figsize=(10, 6))
plt.hexbin(diamonds['carat'].head(1000), diamonds['price'].head(1000), gridsize=30, cmap='YlOrRd')
plt.colorbar(label='Count')
plt.xlabel('Carat')
plt.ylabel('Price')
plt.title('Hexbin Plot: Carat vs Price')
plt.show()

### 33. Contour Plot

In [None]:
plt.figure(figsize=(10, 6))
sns.kdeplot(data=iris, x='sepal_length', y='sepal_width', levels=10, fill=True, cmap='viridis')
plt.title('Contour Plot of Sepal Dimensions')
plt.show()

### 34. Swarm by Category

In [None]:
plt.figure(figsize=(10, 6))
sns.swarmplot(data=tips, x='day', y='total_bill', hue='time')
plt.title('Total Bill Distribution by Day and Time')
plt.show()

### 35. Strip by Category

In [None]:
plt.figure(figsize=(10, 6))
sns.stripplot(data=tips, x='day', y='tip', hue='sex', dodge=True, alpha=0.6)
plt.title('Tip Distribution by Day and Gender')
plt.show()

### 36. Point Plot

In [None]:
plt.figure(figsize=(10, 6))
sns.pointplot(data=tips, x='day', y='total_bill', hue='sex')
plt.title('Average Total Bill by Day and Gender')
plt.show()

### 37. Bar with Error Bars

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(data=tips, x='day', y='total_bill', errorbar='sd')
plt.title('Average Total Bill by Day (with SD)')
plt.show()

### 38. LM Plot

In [None]:
sns.lmplot(data=tips, x='total_bill', y='tip', hue='time', height=6, aspect=1.5)
plt.title('Total Bill vs Tip by Time')
plt.show()

### 39. Residual Plot

In [None]:
plt.figure(figsize=(10, 6))
sns.residplot(data=tips, x='total_bill', y='tip', lowess=True)
plt.title('Residual Plot: Total Bill vs Tip')
plt.show()

### 40. Multiple Line Plot

In [None]:
plt.figure(figsize=(12, 6))
for month in flights['month'].unique()[:5]:
    month_data = flights[flights['month'] == month]
    plt.plot(month_data['year'], month_data['passengers'], marker='o', label=month)
plt.xlabel('Year')
plt.ylabel('Passengers')
plt.title('Passenger Trends by Month')
plt.legend()
plt.grid(True)
plt.show()

# Section 5: Multivariate Plots (41-50)

### 41. Pair Plot

In [None]:
sns.pairplot(iris, hue='species', height=3)
plt.suptitle('Pair Plot of Iris Dataset', y=1.02)
plt.show()

### 42. Facet Grid Hist

In [None]:
g = sns.FacetGrid(tips, col='time', row='sex', height=4)
g.map(plt.hist, 'total_bill', bins=15)
g.add_legend()
plt.show()

### 43. Facet Grid Scatter

In [None]:
g = sns.FacetGrid(tips, col='day', hue='sex', height=4)
g.map(plt.scatter, 'total_bill', 'tip', alpha=0.7)
g.add_legend()
plt.show()

### 44. 3D Scatter

In [None]:
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')
for species in iris['species'].unique():
    species_data = iris[iris['species'] == species]
    ax.scatter(species_data['sepal_length'], species_data['sepal_width'], species_data['petal_length'], label=species, s=50)
ax.set_xlabel('Sepal Length')
ax.set_ylabel('Sepal Width')
ax.set_zlabel('Petal Length')
ax.set_title('3D Scatter Plot')
ax.legend()
plt.show()

### 45. 3D Surface

In [None]:
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')
X = np.arange(-5, 5, 0.25)
Y = np.arange(-5, 5, 0.25)
X, Y = np.meshgrid(X, Y)
Z = np.sin(np.sqrt(X**2 + Y**2))
surf = ax.plot_surface(X, Y, Z, cmap='viridis')
fig.colorbar(surf, shrink=0.5)
ax.set_title('3D Surface Plot')
plt.show()

### 46. Parallel Coordinates

In [None]:
from pandas.plotting import parallel_coordinates
plt.figure(figsize=(12, 6))
parallel_coordinates(iris, 'species', colormap='viridis')
plt.title('Parallel Coordinates Plot')
plt.grid(True)
plt.show()

### 47. Andrews Curves

In [None]:
from pandas.plotting import andrews_curves
plt.figure(figsize=(12, 6))
andrews_curves(iris, 'species', colormap='Set1')
plt.title('Andrews Curves')
plt.grid(True)
plt.show()

### 48. Radviz

In [None]:
from pandas.plotting import radviz
plt.figure(figsize=(10, 10))
radviz(iris, 'species', colormap='viridis')
plt.title('Radviz Plot')
plt.show()

### 49. Cluster Map

In [None]:
iris_numeric = iris.select_dtypes(include=[np.number])
sns.clustermap(iris_numeric.corr(), annot=True, cmap='coolwarm', center=0, figsize=(10, 8))
plt.title('Hierarchical Cluster Map')
plt.show()

### 50. Bubble Chart

In [None]:
plt.figure(figsize=(12, 8))
tips_sample = tips.sample(100, random_state=42)
plt.scatter(tips_sample['total_bill'], tips_sample['tip'], s=tips_sample['size']*100, alpha=0.5, c=tips_sample['size'], cmap='viridis')
plt.colorbar(label='Party Size')
plt.xlabel('Total Bill')
plt.ylabel('Tip')
plt.title('Bubble Chart')
plt.show()

# Section 6: Advanced Visualizations (51-60)

### 51. Treemap

In [None]:
plt.figure(figsize=(12, 8))
class_counts = titanic['Pclass'].value_counts()
squarify.plot(sizes=class_counts.values, label=class_counts.index, alpha=0.8, color=['#ff9999','#66b3ff','#99ff99'])
plt.title('Treemap')
plt.axis('off')
plt.show()

### 52. Sunburst Plotly

In [None]:
fig = px.sunburst(tips, path=['day', 'time', 'sex'], values='total_bill', title='Sunburst Chart')
fig.show()

### 53. Sankey Diagram

In [None]:
sankey_data = titanic.groupby(['Pclass', 'Survived']).size().reset_index(name='count')
fig = go.Figure(data=[go.Sankey(node=dict(pad=15, thickness=20, label=['Class 1', 'Class 2', 'Class 3', 'Not Survived', 'Survived'], color='blue'), link=dict(source=[0,0,1,1,2,2], target=[3,4,3,4,3,4], value=sankey_data['count'].tolist()))])
fig.update_layout(title_text='Sankey Diagram')
fig.show()

### 54. Waterfall Chart

In [None]:
categories = ['Start', 'Revenue', 'Costs', 'Profit', 'Taxes', 'Net']
values = [0, 100, -30, 70, -20, 50]
fig = go.Figure(go.Waterfall(x=categories, y=values, textposition='outside', connector={'line':{'color':'rgb(63, 63, 63)'}}))
fig.update_layout(title='Waterfall Chart')
fig.show()

### 55. Funnel Chart

In [None]:
stages = ['Visitors', 'Sign-ups', 'Active', 'Paid', 'Premium']
values = [1000, 600, 400, 200, 50]
fig = go.Figure(go.Funnel(y=stages, x=values, textinfo='value+percent initial'))
fig.update_layout(title='Funnel Chart')
fig.show()

### 56. Radar Chart

In [None]:
categories = ['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width']
species_avg = iris.groupby('species')[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].mean()
fig = go.Figure()
for species in species_avg.index:
    fig.add_trace(go.Scatterpolar(r=species_avg.loc[species].values, theta=categories, fill='toself', name=species))
fig.update_layout(polar=dict(radialaxis=dict(visible=True)), title='Radar Chart')
fig.show()

### 57. Animated Scatter

In [None]:
fig = px.scatter(iris, x='sepal_length', y='sepal_width', animation_frame='species', color='species', size='petal_length', title='Animated Scatter')
fig.show()

### 58. Choropleth Sample

In [None]:
df_geo = pd.DataFrame({'country': ['USA', 'Canada', 'Brazil', 'UK', 'France', 'Germany'], 'value': [100, 80, 60, 90, 85, 95]})
fig = px.choropleth(df_geo, locations='country', locationmode='country names', color='value', title='Choropleth Map')
fig.show()

### 59. Dendrogram

In [None]:
from scipy.cluster.hierarchy import dendrogram, linkage
plt.figure(figsize=(12, 8))
iris_numeric = iris.select_dtypes(include=[np.number])
linkage_matrix = linkage(iris_numeric, method='ward')
dendrogram(linkage_matrix)
plt.title('Dendrogram')
plt.xlabel('Sample Index')
plt.ylabel('Distance')
plt.show()

### 60. Word Cloud

In [None]:
text = ' '.join(titanic['Name'].astype(str))
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
plt.figure(figsize=(12, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud')
plt.show()

# Section 7: Statistical Plots (61-70)

### 61. Box with Outliers

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
axes[0].boxplot(titanic['Fare'].dropna())
axes[0].set_title('Box Plot of Fare')
Q1 = titanic['Fare'].quantile(0.25)
Q3 = titanic['Fare'].quantile(0.75)
IQR = Q3 - Q1
outliers = titanic[(titanic['Fare'] < Q1-1.5*IQR) | (titanic['Fare'] > Q3+1.5*IQR)]['Fare']
axes[1].scatter(range(len(titanic['Fare'].dropna())), titanic['Fare'].dropna(), alpha=0.5)
axes[1].scatter([titanic[titanic['Fare']==val].index[0] for val in outliers], outliers, color='red', s=100)
axes[1].set_title('Outliers Highlighted')
plt.tight_layout()
plt.show()

### 62. Multi Distribution

In [None]:
plt.figure(figsize=(12, 6))
for species in iris['species'].unique():
    subset = iris[iris['species']==species]
    sns.kdeplot(subset['sepal_length'], label=species, fill=True, alpha=0.5)
plt.title('Distribution by Species')
plt.legend()
plt.show()

### 63. Ridge Plot

In [None]:
fig, axes = plt.subplots(len(iris['species'].unique()), 1, figsize=(10, 8), sharex=True)
for i, species in enumerate(iris['species'].unique()):
    subset = iris[iris['species']==species]
    sns.kdeplot(subset['sepal_length'], ax=axes[i], fill=True, alpha=0.7)
    axes[i].set_ylabel(species)
    axes[i].set_yticks([])
plt.xlabel('Sepal Length')
plt.suptitle('Ridge Plot')
plt.tight_layout()
plt.show()

### 64. Confidence Intervals

In [None]:
plt.figure(figsize=(10, 6))
mean_vals = tips.groupby('day')['total_bill'].mean()
sem_vals = tips.groupby('day')['total_bill'].sem()
ci = 1.96 * sem_vals
plt.errorbar(range(len(mean_vals)), mean_vals, yerr=ci, fmt='o-', capsize=5)
plt.xticks(range(len(mean_vals)), mean_vals.index)
plt.title('Mean with 95% CI')
plt.grid(True)
plt.show()

### 65. Moving Average

In [None]:
plt.figure(figsize=(12, 6))
flights_sorted = flights.sort_values(['year', 'month'])
flights_sorted['MA_3'] = flights_sorted['passengers'].rolling(window=3).mean()
plt.plot(range(len(flights_sorted)), flights_sorted['passengers'], alpha=0.5, label='Original')
plt.plot(range(len(flights_sorted)), flights_sorted['MA_3'], linewidth=2, label='3-Month MA')
plt.title('With Moving Average')
plt.legend()
plt.show()

### 66. Lag Plot

In [None]:
from pandas.plotting import lag_plot
plt.figure(figsize=(10, 6))
lag_plot(flights['passengers'])
plt.title('Lag Plot')
plt.show()

### 67. Autocorrelation

In [None]:
from pandas.plotting import autocorrelation_plot
plt.figure(figsize=(12, 6))
autocorrelation_plot(flights['passengers'])
plt.title('Autocorrelation Plot')
plt.show()

### 68. Bootstrap Plot

In [None]:
from pandas.plotting import bootstrap_plot
fig = plt.figure(figsize=(10, 6))
bootstrap_plot(tips['total_bill'], size=50, samples=100, color='steelblue')
plt.title('Bootstrap Plot')
plt.show()

### 69. Binned Statistics

In [None]:
from scipy.stats import binned_statistic
plt.figure(figsize=(10, 6))
statistic, bin_edges, _ = binned_statistic(tips['total_bill'], tips['tip'], statistic='mean', bins=10)
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
plt.bar(bin_centers, statistic, width=np.diff(bin_edges), alpha=0.7, edgecolor='black')
plt.title('Binned Mean')
plt.show()

### 70. Chi-Square Test

In [None]:
from scipy.stats import chi2_contingency
contingency_table = pd.crosstab(titanic['Survived'], titanic['Pclass'])
chi2, p_value, dof, expected = chi2_contingency(contingency_table)
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
sns.heatmap(contingency_table, annot=True, fmt='d', cmap='Blues', ax=axes[0])
axes[0].set_title('Observed')
sns.heatmap(expected, annot=True, fmt='.1f', cmap='Reds', ax=axes[1])
axes[1].set_title(f'Expected (Chi2={chi2:.2f})')
plt.tight_layout()
plt.show()

# Section 8: More Statistical Tests (71-80)

### 71. T-Test Visual

In [None]:
from scipy.stats import ttest_ind
male_tips = tips[tips['sex']=='Male']['tip']
female_tips = tips[tips['sex']=='Female']['tip']
t_stat, p_value = ttest_ind(male_tips, female_tips)
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
sns.histplot(male_tips, kde=True, label='Male', alpha=0.6, ax=axes[0])
sns.histplot(female_tips, kde=True, label='Female', alpha=0.6, ax=axes[0])
axes[0].set_title(f't-test: p={p_value:.4f}')
axes[0].legend()
sns.boxplot(data=tips, x='sex', y='tip', ax=axes[1])
plt.tight_layout()
plt.show()

### 72. ANOVA Visual

In [None]:
from scipy.stats import f_oneway
groups = [iris[iris['species']==s]['sepal_length'].values for s in iris['species'].unique()]
f_stat, p_value = f_oneway(*groups)
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
sns.boxplot(data=iris, x='species', y='sepal_length', ax=axes[0])
axes[0].set_title(f'ANOVA F={f_stat:.3f}, p={p_value:.6f}')
sns.violinplot(data=iris, x='species', y='sepal_length', ax=axes[1])
plt.tight_layout()
plt.show()

### 73. Normality Tests

In [None]:
from scipy.stats import shapiro
stat, p_value = shapiro(titanic['Age'].dropna())
fig, axes = plt.subplots(1, 3, figsize=(16, 5))
ages = titanic['Age'].dropna()
axes[0].hist(ages, bins=30, density=True, alpha=0.7)
mu, sigma = ages.mean(), ages.std()
x = np.linspace(ages.min(), ages.max(), 100)
axes[0].plot(x, stats.norm.pdf(x, mu, sigma), 'r-', linewidth=2)
axes[0].set_title(f'Shapiro p={p_value:.4f}')
stats.probplot(ages, dist='norm', plot=axes[1])
axes[2].boxplot(ages)
plt.tight_layout()
plt.show()

### 74. Correlation + P-values

In [None]:
from scipy.stats import pearsonr
iris_numeric = iris.select_dtypes(include=[np.number])
corr_matrix = iris_numeric.corr()
p_values = pd.DataFrame(np.zeros_like(corr_matrix), columns=corr_matrix.columns, index=corr_matrix.index)
for i in range(len(corr_matrix.columns)):
    for j in range(len(corr_matrix.columns)):
        if i != j:
            _, p_values.iloc[i,j] = pearsonr(iris_numeric.iloc[:,i], iris_numeric.iloc[:,j])
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, ax=axes[0], square=True)
axes[0].set_title('Correlation')
sns.heatmap(p_values<0.05, annot=True, cmap='RdYlGn', cbar=False, ax=axes[1], square=True)
axes[1].set_title('Significance')
plt.tight_layout()
plt.show()

### 75. Z-Score Visual

In [None]:
from scipy.stats import zscore
ages = titanic['Age'].dropna()
z_scores = zscore(ages)
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
axes[0].hist(ages, bins=30, alpha=0.7)
axes[0].set_title('Original')
axes[1].hist(z_scores, bins=30, alpha=0.7, color='orange')
axes[1].axvline(-3, color='r', linestyle='--')
axes[1].axvline(3, color='r', linestyle='--')
axes[1].set_title('Z-Scores')
plt.tight_layout()
plt.show()

### 76. Percentile Plot

In [None]:
plt.figure(figsize=(12, 6))
percentiles = np.percentile(titanic['Fare'].dropna(), range(0, 101, 5))
plt.plot(range(0, 101, 5), percentiles, marker='o')
plt.fill_between(range(0, 101, 5), percentiles, alpha=0.3)
plt.xlabel('Percentile')
plt.ylabel('Fare')
plt.title('Percentile Plot')
plt.grid(True)
plt.show()

### 77. Empirical vs Theoretical

In [None]:
plt.figure(figsize=(12, 6))
ages = titanic['Age'].dropna()
sns.histplot(ages, kde=True, stat='density', label='Empirical', alpha=0.6)
mu, sigma = ages.mean(), ages.std()
x = np.linspace(ages.min(), ages.max(), 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma), 'r-', linewidth=2, label='Theoretical')
plt.title('Empirical vs Theoretical')
plt.legend()
plt.show()

### 78. Residual Analysis

In [None]:
X = tips['total_bill'].values.reshape(-1, 1)
y = tips['tip'].values
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
residuals = y - y_pred
std_residuals = residuals / np.std(residuals)
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes[0,0].scatter(y_pred, residuals, alpha=0.6)
axes[0,0].axhline(0, color='r', linestyle='--')
axes[0,0].set_title('Residuals vs Fitted')
stats.probplot(residuals, dist='norm', plot=axes[0,1])
axes[1,0].scatter(y_pred, np.sqrt(np.abs(std_residuals)), alpha=0.6)
axes[1,0].set_title('Scale-Location')
axes[1,1].hist(std_residuals, bins=20)
axes[1,1].set_title('Residuals Histogram')
plt.tight_layout()
plt.show()

### 79. Influence Plot

In [None]:
X = tips['total_bill'].values.reshape(-1, 1)
y = tips['tip'].values
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
residuals = y - y_pred
leverage = (X - X.mean())**2 / np.sum((X - X.mean())**2)
plt.figure(figsize=(10, 6))
plt.scatter(leverage.flatten(), residuals, alpha=0.6)
plt.xlabel('Leverage')
plt.ylabel('Residuals')
plt.title('Influence Plot')
plt.axhline(0, color='r', linestyle='--')
plt.grid(True)
plt.show()

### 80. Cook's Distance

In [None]:
X = tips['total_bill'].values.reshape(-1, 1)
y = tips['tip'].values
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)
residuals = y - y_pred
mse = np.mean(residuals**2)
leverage = (X - X.mean())**2 / np.sum((X - X.mean())**2)
cooks_d = (residuals**2 / (2*mse)) * (leverage.flatten() / (1-leverage.flatten())**2)
plt.figure(figsize=(10, 6))
plt.stem(range(len(cooks_d)), cooks_d, markerfmt=',')
plt.axhline(4/len(cooks_d), color='r', linestyle='--', label='Threshold')
plt.title("Cook's Distance")
plt.legend()
plt.show()

# Section 9: Interactive Plotly Visualizations (81-90)

### 81. Interactive Scatter

In [None]:
fig = px.scatter(iris, x='sepal_length', y='sepal_width', color='species', size='petal_length', hover_data=['petal_width'], title='Interactive Scatter')
fig.show()

### 82. Interactive Line

In [None]:
flights_by_month = flights.groupby('month')['passengers'].sum().reset_index()
fig = px.line(flights_by_month, x='month', y='passengers', title='Interactive Line', markers=True)
fig.show()

### 83. Interactive Box

In [None]:
fig = px.box(tips, x='day', y='total_bill', color='time', title='Interactive Box Plot')
fig.show()

### 84. Interactive Violin

In [None]:
fig = px.violin(tips, x='day', y='total_bill', color='sex', box=True, points='all', title='Interactive Violin')
fig.show()

### 85. Interactive Histogram

In [None]:
fig = px.histogram(titanic, x='Age', color='Survived', marginal='box', title='Interactive Histogram')
fig.show()

### 86. Density Heatmap

In [None]:
fig = px.density_heatmap(tips, x='total_bill', y='tip', marginal_x='histogram', marginal_y='histogram', title='Density Heatmap')
fig.show()

### 87. Interactive Parallel

In [None]:
fig = px.parallel_coordinates(iris, color='species', dimensions=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], title='Parallel Coordinates')
fig.show()

### 88. Interactive 3D

In [None]:
fig = px.scatter_3d(iris, x='sepal_length', y='sepal_width', z='petal_length', color='species', size='petal_width', title='3D Scatter')
fig.show()

### 89. Interactive Bubble

In [None]:
fig = px.scatter(tips, x='total_bill', y='tip', size='size', color='day', hover_data=['time'], title='Bubble Chart')
fig.show()

### 90. Interactive Treemap

In [None]:
fig = px.treemap(tips, path=['day', 'time', 'sex'], values='total_bill', title='Treemap')
fig.show()

# Section 10: Final Visualizations (91-100)

### 91. Missing Data Matrix

In [None]:
msno.matrix(titanic, figsize=(12, 6))
plt.title('Missing Data Matrix')
plt.show()

### 92. Missing Bar

In [None]:
msno.bar(titanic, figsize=(12, 6))
plt.title('Missing Data Counts')
plt.show()

### 93. Missing Heatmap

In [None]:
msno.heatmap(titanic, figsize=(12, 8))
plt.title('Missing Data Correlation')
plt.show()

### 94. Gauge Chart

In [None]:
survival_rate = titanic['Survived'].mean() * 100
fig = go.Figure(go.Indicator(mode='gauge+number+delta', value=survival_rate, domain={'x': [0,1], 'y': [0,1]}, title={'text': 'Survival Rate (%)'}, delta={'reference': 50}, gauge={'axis': {'range': [None, 100]}, 'bar': {'color': 'darkblue'}, 'steps': [{'range': [0,30], 'color': 'lightgray'}, {'range': [30,70], 'color': 'gray'}]}))
fig.show()

### 95. Candlestick Chart

In [None]:
dates = pd.date_range('2023-01-01', periods=20)
np.random.seed(42)
stock_data = pd.DataFrame({'date': dates, 'open': 100+np.random.randn(20).cumsum(), 'high': 105+np.random.randn(20).cumsum(), 'low': 95+np.random.randn(20).cumsum(), 'close': 100+np.random.randn(20).cumsum()})
fig = go.Figure(data=[go.Candlestick(x=stock_data['date'], open=stock_data['open'], high=stock_data['high'], low=stock_data['low'], close=stock_data['close'])])
fig.update_layout(title='Candlestick')
fig.show()

### 96. Polar Bar

In [None]:
fig = go.Figure(go.Barpolar(r=[1,2,3,4,5], theta=['A','B','C','D','E']))
fig.update_layout(title='Polar Bar Chart')
fig.show()

### 97. Ternary Plot

In [None]:
df_ternary = pd.DataFrame({'a': [1,2,3], 'b': [2,3,1], 'c': [3,1,2]})
fig = px.scatter_ternary(df_ternary, a='a', b='b', c='c', title='Ternary Plot')
fig.show()

### 98. Multi Y-Axis

In [None]:
fig = make_subplots(specs=[[{'secondary_y': True}]])
fig.add_trace(go.Scatter(x=flights['year'].unique(), y=flights.groupby('year')['passengers'].sum(), name='Passengers'), secondary_y=False)
fig.add_trace(go.Scatter(x=flights['year'].unique(), y=flights.groupby('year')['passengers'].mean(), name='Avg'), secondary_y=True)
fig.update_layout(title='Multi Y-Axis')
fig.show()

### 99. Streamgraph

In [None]:
flights_pivot = flights.pivot(index='year', columns='month', values='passengers')
fig = go.Figure()
for col in flights_pivot.columns[:5]:
    fig.add_trace(go.Scatter(x=flights_pivot.index, y=flights_pivot[col], mode='lines', stackgroup='one', name=col))
fig.update_layout(title='Streamgraph')
fig.show()

### 100. Distribution Summary

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
sns.histplot(titanic['Age'].dropna(), bins=30, kde=True, ax=axes[0,0])
axes[0,0].set_title('Histogram + KDE')
sns.boxplot(data=titanic, y='Age', ax=axes[0,1])
axes[0,1].set_title('Box Plot')
sns.violinplot(data=titanic, y='Age', ax=axes[1,0])
axes[1,0].set_title('Violin Plot')
stats.probplot(titanic['Age'].dropna(), dist='norm', plot=axes[1,1])
axes[1,1].set_title('Q-Q Plot')
plt.suptitle('Distribution Summary: Age', fontsize=16)
plt.tight_layout()
plt.show()