<!-- Basic Statistics – Mean, median, standard deviation, etc.

Value Counts – Frequency of important categories (like bird species).

Data Distribution – Histograms and KDE plots.

Correlation – Find relationships between numeric features.

Trend Analysis – Bird strikes over time. -->

In [1]:
# eda.py
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

class BirdStrikeEDA:
    def __init__(self, data):
        self.data = data

    def show_basic_stats(self):
        """Show basic statistics of the dataset."""
        print("\n--- Basic Statistics ---")
        print(self.data.describe(include='all'))

    def plot_all(self, date_column, target_column, top_n=10):
        """Plot all key EDA graphs in one frame."""
        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
        fig.suptitle('Bird Strike Data Analysis', fontsize=18)

        # 1. Top Value Counts
        if target_column in self.data.columns:
            sns.countplot(
                y=self.data[target_column],
                order=self.data[target_column].value_counts().index[:top_n],
                palette='viridis',
                ax=axes[0, 0]
            )
            axes[0, 0].set_title(f'Top {top_n} {target_column} Counts')

        # 2. Distribution of Altitude
        if 'Altitude' in self.data.columns:
            sns.histplot(self.data['Altitude'], kde=True, color='skyblue', ax=axes[0, 1])
            axes[0, 1].set_title('Altitude Distribution')

        # 3. Correlation Heatmap
        numeric_data = self.data.select_dtypes(include=['float64', 'int64'])
        sns.heatmap(
            
            numeric_data.corr(), annot=True, cmap='coolwarm', fmt='.2f',
            linewidths=0.5, ax=axes[1, 0]
        )
        axes[1, 0].set_title('Correlation Heatmap')

        # 4. Trend Over Time
        if date_column in self.data.columns and target_column in self.data.columns:
            trend_data = self.data.groupby(self.data[date_column].dt.year)[target_column].count()
            sns.lineplot(x=trend_data.index, y=trend_data.values, marker='o', color='blue', ax=axes[1, 1])
            axes[1, 1].set_title(f'{target_column} Over Time')

        plt.tight_layout(rect=[0, 0, 1, 0.96])
        plt.show()
