In [None]:
#!/usr/bin/env python3


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Configure plots
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (10, 6)

# Path to your data file
DATA_PATH = "data/data.csv"

def main():
    # 1. Load data
    df = pd.read_csv(DATA_PATH)
    print("\n>>> Data Head:")
    print(df.head(), "\n")

    # 2. Basic overview
    print(">>> Shape:", df.shape)
    print("\n>>> Info:")
    df.info()
    print("\n>>> Summary statistics:")
    print(df.describe(), "\n")

    # 3. Missing values
    missing = df.isnull().sum()
    print(">>> Missing values (by column):")
    print(missing[missing > 0], "\n")

    # 4. Target distribution & outliers
    target = "equipment_energy_consumption"
    plt.figure()
    sns.histplot(df[target], kde=True)
    plt.title("Distribution of Equipment Energy Consumption")
    plt.xlabel(target)
    plt.tight_layout()
    plt.show()

    plt.figure()
    sns.boxplot(x=df[target])
    plt.title("Boxplot of Equipment Energy Consumption")
    plt.tight_layout()
    plt.show()

    # 5. Time-based trends
    if "timestamp" in df.columns:
        df["timestamp"] = pd.to_datetime(df["timestamp"])
        plt.figure()
        plt.plot(df["timestamp"], df[target])
        plt.title("Energy Consumption Over Time")
        plt.xlabel("Timestamp")
        plt.ylabel("Consumption")
        plt.tight_layout()
        plt.show()
    else:
        print("No 'timestamp' column found for time-based plots.\n")

    # 6. Correlation heatmap
    corr = df.corr()
    plt.figure(figsize=(14, 10))
    sns.heatmap(corr, cmap="coolwarm", center=0)
    plt.title("Feature Correlation Heatmap")
    plt.tight_layout()
    plt.show()

    # 7. Top correlated features
    target_corr = corr[target].sort_values(ascending=False)
    print(">>> Top positive correlations with target:")
    print(target_corr.head(10), "\n")
    print(">>> Top negative correlations with target:")
    print(target_corr.tail(10), "\n")

    # 8. Pairplot for key features (optional)
    # Uncomment and adjust if you want pairplots for the top features
    # top_feats = target_corr.index[1:5].tolist()
    # sns.pairplot(df[top_feats + [target]].dropna())
    # plt.show()

    # 9. Save any observations you note, or continue with preprocessing...
    print("EDA complete. Review plots & printed summaries for insights.")

if __name__ == "__main__":
    main()
