In [5]:
"""
Weather Data Analysis - Robust fixed version
- Ignores IPython flags like '-f' when deciding CSV path
- Uses non-interactive matplotlib backend (works in Jupyter/headless)
- Saves plots to ./plots and cleaned CSV to ./cleaned_weather_data.csv
Run:
    python weather_analysis_fixed.py [csv_path]
Or in a notebook call:
    from weather_analysis_fixed import main
    main("weather_dataset.csv")
"""

import sys
import traceback
from pathlib import Path
import matplotlib

# Use non-interactive backend early to avoid GUI/display errors
matplotlib.use("Agg")
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

REQUIRED_COLS = ["Date", "Temp", "Humidity", "Rainfall"]


def load_dataset(path: Path) -> pd.DataFrame:
    if not path.exists():
        raise FileNotFoundError(f"CSV not found: {path}")
    try:
        df = pd.read_csv(path, encoding="utf-8")
    except Exception:
        # fallback to a more permissive read
        df = pd.read_csv(path, engine="python", encoding="latin-1")
    return df


def ensure_columns(df: pd.DataFrame, required: list) -> None:
    df.columns = [c.strip() for c in df.columns]
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise KeyError(f"Missing required column(s): {', '.join(missing)}")


def prepare_weather(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    # Normalize column names
    df.columns = [c.strip() for c in df.columns]

    # Ensure required columns exist
    ensure_columns(df, REQUIRED_COLS)

    # Drop rows where all required columns are NaN
    df = df.dropna(subset=REQUIRED_COLS, how="all")

    # Parse Date robustly
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
    if df["Date"].isna().all():
        raise ValueError("All Date values failed to parse. Check the Date column format.")

    # Convert numeric columns, coercing errors to NaN
    for col in ["Temp", "Humidity", "Rainfall"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # Drop rows missing any required numeric values or date
    df = df.dropna(subset=["Date", "Temp", "Humidity", "Rainfall"])

    if df.empty:
        raise ValueError("After cleaning, no valid rows remain. Check CSV values for Temp/Humidity/Rainfall.")

    # Create helpful columns
    df["Month"] = df["Date"].dt.month
    df["Year"] = df["Date"].dt.year

    def get_season(month: int) -> str:
        if month in (12, 1, 2):
            return "Winter"
        elif month in (3, 4, 5):
            return "Summer"
        elif month in (6, 7, 8):
            return "Monsoon"
        else:
            return "Post-Monsoon"

    df["Season"] = df["Month"].apply(get_season)

    # Keep only needed columns and sort
    weather = df[["Date", "Year", "Month", "Season", "Temp", "Humidity", "Rainfall"]].sort_values("Date").reset_index(drop=True)
    return weather


def compute_stats(weather: pd.DataFrame) -> dict:
    s = {}
    s["mean_temp"] = float(np.mean(weather["Temp"]))
    s["max_temp"] = float(np.max(weather["Temp"]))
    s["min_humidity"] = float(np.min(weather["Humidity"]))
    s["std_rainfall"] = float(np.std(weather["Rainfall"]))
    s["monthly_mean_temp"] = weather.groupby("Month")["Temp"].mean()
    s["monthly_total_rain"] = weather.groupby("Month")["Rainfall"].sum()
    s["yearly_max_temp"] = weather.groupby("Year")["Temp"].max()
    s["seasonal_stats"] = weather.groupby("Season")[["Temp", "Rainfall", "Humidity"]].mean()
    return s


def plot_and_save(weather: pd.DataFrame, stats: dict, out_dir: Path) -> None:
    out_dir.mkdir(parents=True, exist_ok=True)

    # A. Daily temperature trend
    try:
        fig = plt.figure(figsize=(10, 4))
        plt.plot(weather["Date"], weather["Temp"])
        plt.title("Daily Temperature Trend")
        plt.xlabel("Date")
        plt.ylabel("Temperature (°C)")
        fig.tight_layout()
        fig.savefig(out_dir / "daily_temp_trend.png")
        plt.close(fig)
    except Exception:
        print("Warning: failed to create daily_temp_trend.png", file=sys.stderr)
        traceback.print_exc()

    # B. Monthly rainfall totals (bar)
    try:
        fig = plt.figure(figsize=(8, 4))
        stats["monthly_total_rain"].sort_index().plot(kind="bar")
        plt.title("Monthly Rainfall")
        plt.xlabel("Month")
        plt.ylabel("Rainfall (mm)")
        fig.tight_layout()
        fig.savefig(out_dir / "monthly_rainfall.png")
        plt.close(fig)
    except Exception:
        print("Warning: failed to create monthly_rainfall.png", file=sys.stderr)
        traceback.print_exc()

    # C. Scatter: Humidity vs Temp
    try:
        fig = plt.figure(figsize=(6, 4))
        plt.scatter(weather["Temp"], weather["Humidity"])
        plt.title("Humidity vs Temperature")
        plt.xlabel("Temperature (°C)")
        plt.ylabel("Humidity (%)")
        fig.tight_layout()
        fig.savefig(out_dir / "humidity_vs_temp.png")
        plt.close(fig)
    except Exception:
        print("Warning: failed to create humidity_vs_temp.png", file=sys.stderr)
        traceback.print_exc()

    # D. Combined
    try:
        fig = plt.figure(figsize=(10, 5))
        ax1 = fig.add_subplot(1, 2, 1)
        ax1.plot(weather["Date"], weather["Temp"])
        ax1.set_title("Daily Temperature Trend")
        ax1.set_xlabel("Date")

        ax2 = fig.add_subplot(1, 2, 2)
        ax2.scatter(weather["Temp"], weather["Humidity"])
        ax2.set_title("Humidity vs Temperature")
        ax2.set_xlabel("Temperature (°C)")

        fig.tight_layout()
        fig.savefig(out_dir / "combined_plot.png")
        plt.close(fig)
    except Exception:
        print("Warning: failed to create combined_plot.png", file=sys.stderr)
        traceback.print_exc()


def export_cleaned(weather: pd.DataFrame, out_path: Path) -> None:
    weather.to_csv(out_path, index=False)
    print(f"Cleaned data exported to: {out_path}")


def main(csv_path: str = "weather_dataset.csv"):
    try:
        p = Path(csv_path)
        print(f"Loading dataset: {p.resolve()}")
        df = load_dataset(p)

        print("\n--- RAW HEAD ---")
        print(df.head())

        print("\n--- RAW INFO ---")
        print(df.info())

        weather = prepare_weather(df)
        print("\n--- CLEANED (head) ---")
        print(weather.head())

        stats = compute_stats(weather)
        print("\n--- STATS ---")
        print(f"Mean Temp: {stats['mean_temp']}")
        print(f"Max Temp: {stats['max_temp']}")
        print(f"Min Humidity: {stats['min_humidity']}")
        print(f"Std Rainfall: {stats['std_rainfall']}")

        print("\n--- SEASONAL STATS ---")
        print(stats["seasonal_stats"])

        out_dir = Path.cwd() / "plots"
        plot_and_save(weather, stats, out_dir)
        export_cleaned(weather, Path.cwd() / "cleaned_weather_data.csv")
        print(f"\nAll plots saved to: {out_dir}")
    except Exception as e:
        print("\nAn error occurred:", file=sys.stderr)
        print(type(e).__name__, ":", e, file=sys.stderr)
        print("\nFull traceback:", file=sys.stderr)
        traceback.print_exc(file=sys.stderr)
        sys.exit(1)


# --- robust CLI entrypoint (handles IPython -f args) ---

def resolve_csv_arg(argv, default="weather_dataset.csv"):
    """
    Return a CSV path:
    - If a positional arg exists and refers to a real file, return it.
    - Ignore args that start with '-' (typical IPython flags).
    - Otherwise return the default path.
    """
    if len(argv) > 1:
        candidate = argv[1]
        if isinstance(candidate, str) and not candidate.startswith("-"):
            p = Path(candidate)
            if p.exists():
                return str(p)
            else:
                print(f"Warning: provided CSV path does not exist: {candidate}. Using default.", file=sys.stderr)
    return default


if __name__ == "__main__":
    csv_file = resolve_csv_arg(sys.argv, default="weather_dataset.csv")
    main(csv_file)




Loading dataset: C:\Users\Windows_11\weather_dataset.csv



An error occurred:
FileNotFoundError : CSV not found: weather_dataset.csv

Full traceback:
Traceback (most recent call last):
  File "C:\Users\Windows_11\AppData\Local\Temp\ipykernel_3688\3882762880.py", line 181, in main
    df = load_dataset(p)
  File "C:\Users\Windows_11\AppData\Local\Temp\ipykernel_3688\3882762880.py", line 30, in load_dataset
    raise FileNotFoundError(f"CSV not found: {path}")
FileNotFoundError: CSV not found: weather_dataset.csv


SystemExit: 1