In [None]:

# ML MODEL ANALYZING MAIZE VARIETIES AND SOIL TREATMENTS USING IOT SENSOR DATA

# 📦 1. Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import joblib
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor

# 📥 2. Load Dataset
data_path = "adata/maize_dataset.csv"  # update with your actual filename
df = pd.read_csv(data_path)
df.head()

# 📊 3. Basic Info and Summary
print(df.info())
print(df.describe())
print(df.isnull().sum())

# 📈 4. Outlier Detection with Box Plots
numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns
for column in numeric_columns:
    plt.figure(figsize=(6, 4))
    df.boxplot(column=[column])
    plt.title(f'Box Plot of {column}')
    plt.grid(True)
    plt.show()

# 🔤 5. Label Encoding for Categorical Columns
label_encoder = LabelEncoder()
for col in df.select_dtypes(include=['object']).columns:
    df[col] = label_encoder.fit_transform(df[col].astype(str))

# 🎯 6. Define X and y
X = df.drop('Total Grain yield (ton/ha)', axis=1)
y = df['Total Grain yield (ton/ha)']

# ⭐ 7. Feature Selection
selector = SelectKBest(score_func=f_classif, k=10)
X = selector.fit_transform(X, y)

# ✂️ 8. Split the Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 📏 9. Standardize the Data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 📉 10. Evaluation Function
def calculate_metrics(model_name, y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred) * 100
    print(f"{model_name} MSE: {mse:.2f}, MAE: {mae:.2f}, R²: {r2:.2f}%")
    plt.figure(figsize=(6, 6))
    plt.scatter(y_true, y_pred, color='blue')
    plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--')
    plt.xlabel("Actual")
    plt.ylabel("Predicted")
    plt.title(f"{model_name} Performance")
    plt.grid(True)
    plt.show()

# 🤖 11. Train SVR Model
svr_model_path = "models/SVM_model.pkl"
if os.path.exists(svr_model_path):
    svr = joblib.load(svr_model_path)
else:
    svr = SVR(kernel='linear')
    svr.fit(X_train, y_train)
    joblib.dump(svr, svr_model_path)
svr_preds = svr.predict(X_test)
calculate_metrics("SVR", y_test, svr_preds)

# 🌲 12. Train Random Forest Model
rf_model_path = "models/RandomForest_model.pkl"
if os.path.exists(rf_model_path):
    rf = joblib.load(rf_model_path)
else:
    rf = RandomForestRegressor()
    rf.fit(X_train, y_train)
    joblib.dump(rf, rf_model_path)
rf_preds = rf.predict(X_test)
calculate_metrics("Random Forest", y_test, rf_preds)
