In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import label_ranking_average_precision_score
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.ensemble import VotingClassifier


train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
submission = pd.read_csv('sample_submission.csv')


le_target = LabelEncoder()
train['label'] = le_target.fit_transform(train['Fertilizer Name'])


cat_cols = ['Soil Type', 'Crop Type']
for col in cat_cols:
    le = LabelEncoder()
    train[col] = le.fit_transform(train[col])
    test[col] = le.transform(test[col])


features = ['Temparature', 'Humidity', 'Moisture', 'Nitrogen', 'Potassium', 'Phosphorous', 'Soil Type', 'Crop Type']
X = train[features]
y = train['label']
X_test = test[features]


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)


lgb_model = lgb.LGBMClassifier(objective='multiclass', num_class=12, n_estimators=1000)
lgb_model.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    callbacks=[lgb.early_stopping(50), lgb.log_evaluation(100)]
)


xgb_model = xgb.XGBClassifier(
    objective='multi:softprob',
    num_class=12,
    use_label_encoder=False,
    n_estimators=300,
    eval_metric='mlogloss'
)


xgb_model.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    verbose=True
)





cat_model = CatBoostClassifier(loss_function='MultiClass', iterations=1000, verbose=100, early_stopping_rounds=50)
cat_model.fit(X_train, y_train, eval_set=(X_val, y_val))


lgb_preds = lgb_model.predict_proba(X_val)
xgb_preds = xgb_model.predict_proba(X_val)
cat_preds = cat_model.predict_proba(X_val)

ensemble_preds_val = (lgb_preds + xgb_preds + cat_preds) / 3
map3_score = label_ranking_average_precision_score(pd.get_dummies(y_val), ensemble_preds_val)
print(f"📊 Validation MAP@3 (ensemble): {map3_score:.5f}")


lgb_test_preds = lgb_model.predict_proba(X_test)
xgb_test_preds = xgb_model.predict_proba(X_test)
cat_test_preds = cat_model.predict_proba(X_test)
ensemble_test_preds = (lgb_test_preds + xgb_test_preds + cat_test_preds) / 3

top3 = np.argsort(ensemble_test_preds, axis=1)[:, -3:][:, ::-1]
submission['Fertilizer'] = [' '.join(le_target.inverse_transform(row)) for row in top3]
submission.to_csv('submission_ensemble1.csv', index=False)
print("✅ Submission file saved as 'submission_ensemble1.csv'")
