In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import pickle

# 1. Load dataset
df = pd.read_excel('./Data/N2O_input.xlsx')  # Replace with your file path

# 2. Rename columns for consistency
df = df.rename(columns={
    '降雨量': 'rain',
    '温度': 'tem',
    '土壤容重': 'bulk',
    '土壤粘粒含量%': 'clay',
    '土壤有机碳SOC': 'soc',
    '土壤全氮': 'tn',
    '土壤pH': 'pH',
    '坡度°': 'slope',
    '灌溉量mm': 'irr',
    '田块类型': 'land_class',
    'EF': 'EF'
})

# 3. Encode land type (one-hot for paddy/upland)
df = pd.get_dummies(df, columns=['land_class'], prefix='land')

# 4. Select features
base_features = ['rain', 'tem', 'bulk', 'clay', 'soc', 'tn', 'pH', 'slope', 'irr']
land_features = [col for col in df.columns if col.startswith('land_')]
all_features = base_features + land_features + ['EF']
df = df[all_features]

# 5. Clean and drop missing
df = df.apply(pd.to_numeric, errors='coerce')
df.dropna(inplace=True)

# 6. Feature/target split
X = df.drop(columns=['EF'])
y = df['EF']

# 7. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 8. Train Random Forest with Grid Search
param_grid = {
    'n_estimators': [250, 500, 800],
    'max_depth': [10, 20, 30]
}
rf = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(rf, param_grid=param_grid, cv=5, scoring='r2', n_jobs=-1)
grid_search.fit(X_train, y_train)

# 9. Predict and evaluate
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("Best Parameters:", grid_search.best_params_)
print('R²:', r2_score(y_test, y_pred))
print('MAE:', mean_absolute_error(y_test, y_pred))
print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))

# 10. (Optional) Save the model
# with open('./Model/N2O_RF_model_simple.pkl', 'wb') as f:
#     pickle.dump(best_model, f)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import pickle

# 1. Load dataset
df = pd.read_excel('./Data/Carbon_input.xlsx', sheet_name='M')  # Replace with your actual path and sheet

# 2. Rename columns to standard English variable names
df = df.rename(columns={
    'MAT': 'tem',            # Mean annual temperature
    'MAP': 'rain',           # Mean annual precipitation
    'BD': 'bulk',            # Bulk density
    'SOC': 'soc',            # Soil organic carbon
    'pH': 'pH',              # Soil pH
    'Clay': 'clay',          # Clay content
    'TN': 'tn',              # Total nitrogen
    'N_input': 'n_input',    # Nitrogen input
    'Class': 'input_type',   # Fertilization type: straw or manure
    'CSE_Str/M': 'cse'       # Carbon sequestration effect
})

# 3. One-hot encode fertilization type (straw, manure)
df = pd.get_dummies(df, columns=['input_type'], prefix='input')

# 4. Select features and target
base_features = ['tem', 'rain', 'bulk', 'soc', 'pH', 'clay', 'tn', 'n_input']
input_type_features = [col for col in df.columns if col.startswith('input_')]
all_features = base_features + input_type_features + ['cse']
df = df[all_features]

# 5. Clean data: convert to numeric and drop rows with missing values
df = df.apply(pd.to_numeric, errors='coerce')
df.dropna(inplace=True)

# 6. Split into features (X) and target (y)
X = df.drop(columns=['cse'])
y = df['cse']

# 7. Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 8. Train Random Forest with Grid Search
param_grid = {
    'n_estimators': [250, 500, 800],
    'max_depth': [10, 20, 30]
}
rf = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(rf, param_grid=param_grid, cv=5, scoring='r2', n_jobs=-1)
grid_search.fit(X_train, y_train)

# 9. Predict and evaluate
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("Best Parameters:", grid_search.best_params_)
print('R²:', r2_score(y_test, y_pred))
print('MAE:', mean_absolute_error(y_test, y_pred))
print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))

# 10. (Optional) Save the model
# with open('./Model/C_Sequestration_RF_model.pkl', 'wb') as f:
#     pickle.dump(best_model, f)


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import pickle

# 1. Load dataset
df = pd.read_excel('./Data/Carbon_input.xlsx', sheet_name='Root')  # Replace with correct sheet if needed

# 2. Rename columns to standard format
df = df.rename(columns={
    'MAT': 'tem',
    'MAP': 'rain',
    'BD': 'bulk',
    'SOC': 'soc',
    'pH': 'pH',
    'Clay': 'clay',
    'TN': 'tn',
    'N_input': 'n_input',
    'CSE_Root': 'cse'  # Rename target to 'cse' for consistency
})

# 3. Select features and target
features = ['tem', 'rain', 'bulk', 'soc', 'pH', 'clay', 'tn', 'n_input', 'cse']
df = df[features]

# 4. Clean data
df = df.apply(pd.to_numeric, errors='coerce')
df.dropna(inplace=True)

# 5. Feature/target split
X = df.drop(columns=['cse'])
y = df['cse']

# 6. Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 7. Train Random Forest with Grid Search
param_grid = {
    'n_estimators': [250, 500, 800],
    'max_depth': [10, 20, 30]
}
rf = RandomForestRegressor(random_state=42)
grid_search = GridSearchCV(rf, param_grid=param_grid, cv=5, scoring='r2', n_jobs=-1)
grid_search.fit(X_train, y_train)

# 8. Predict and evaluate
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("Best Parameters:", grid_search.best_params_)
print('R²:', r2_score(y_test, y_pred))
print('MAE:', mean_absolute_error(y_test, y_pred))
print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))

# 9. (Optional) Save the model
# with open('./Model/C_Sequestration_Root.pkl', 'wb') as f:
#     pickle.dump(best_model, f)
