In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, TensorDataset
import optuna
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.inspection import permutation_importance

# Optuna Visualization Tools
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_slice
from optuna.visualization import plot_param_importances

# Set plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


  from .autonotebook import tqdm as notebook_tqdm


Using device: cpu


In [2]:

# Load dataset
df = pd.read_parquet('Parquet/XY_v2.parquet')

# --- LIST AVAILABLE CROPS ---
# Assumes targets start with 'Y_'
target_columns = [col for col in df.columns if col.startswith('Y_')]
available_crops = [col.replace('Y_', '') for col in target_columns]

print("--- Available Crops found in Dataset ---")
print(available_crops)
print("-" * 40)


# ==========================================
# 1. CHOOSE CROP DYNAMICALLY
# ==========================================
CHOSEN_CROP = "rice"
TARGET_COL = f"Y_{CHOSEN_CROP}"
print("Chosen crop:", CHOSEN_CROP)
print("Target column:", TARGET_COL)

# Safety check: make sure the target exists
if TARGET_COL not in df.columns:
    raise ValueError(f"Target column {TARGET_COL} not found in df.columns")

# ==========================================
# 2. BUILD BASE df_model
# ==========================================
df_model = df.copy()

# ==========================================
# 3. DROP avg_yield_* FOR OTHER CROPS
#    Keep only avg_yield_<CHOSEN_CROP>_*
# ==========================================
crop_prefix = f"avg_yield_{CHOSEN_CROP}_"

cols_to_drop = [
    c for c in df_model.columns
    if c.startswith("avg_yield_") and not c.startswith(crop_prefix)
]

df_model = df_model.drop(columns=cols_to_drop)
print(f"Dropped {len(cols_to_drop)} avg_yield_* columns not for {CHOSEN_CROP}")

# ==========================================
# 4. DEFINE META + TARGETS + FEATURES
# ==========================================
meta_cols = ["year", "area"]

# All Y_* target columns (we will exclude them from features)
target_cols = [c for c in df_model.columns if c.startswith("Y_")]

# Feature columns = everything except meta + all Y_* targets
FEATURE_COLS = [
    c for c in df_model.columns
    if c not in meta_cols and c not in target_cols
]

print("Number of features:", len(FEATURE_COLS))
print("Example features:", FEATURE_COLS[:22])

# Reorder df_model for clarity
df_model = df_model[meta_cols + FEATURE_COLS + [TARGET_COL]].copy()



FileNotFoundError: [Errno 2] No such file or directory: 'Parquet/XY_v2.parquet'