In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Load dataset after uploading
df = pd.read_csv('/content/Cleaned_Crop_Yield_Dataset.csv')
df.columns = df.columns.str.strip()

# One-hot encode categorical columns
cat_cols = df.select_dtypes(include=['object']).columns.tolist()
if len(cat_cols) > 0:
    df = pd.get_dummies(df, columns=cat_cols, drop_first=True)

# Separate features and target
X = df.drop(columns=['Yield'])
y = df['Yield']

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Train model
model = LinearRegression()
model.fit(X_train_poly, y_train)
y_pred = model.predict(X_test_poly)

# Metrics
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

# Cross-validation (optional)
cv_scores = cross_val_score(model, poly.fit_transform(X_scaled), y, cv=5, scoring='r2')
mean_cv_r2 = np.mean(cv_scores)

print("Linear Regression Performance:")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R2 Score: {r2:.4f}")
print(f"Accuracy: {r2:.4f}")


Linear Regression Performance:
RMSE: 0.0000
MAE: 0.0000
R2 Score: 1.0000
Accuracy: 1.0000


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

df = pd.read_csv('/content/Cleaned_Crop_Yield_Dataset.csv')
df.columns = df.columns.str.strip()
cat_cols = df.select_dtypes(include=['object']).columns.tolist()
if len(cat_cols) > 0:
    df = pd.get_dummies(df, columns=cat_cols, drop_first=True)

X = df.drop(columns=['Yield'])
y = df['Yield']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Random Forest Regression Performance:")
print(f"RMSE: {rmse:.4f}")
print(f"R2 Score: {r2:.4f}")
print(f"MAE: {mae:.4f}")
print(f"Accuracy: {r2:.4f}")  # Here, accuracy is shown as the R2 Score


Random Forest Regression Performance:
RMSE: 0.0483
R2 Score: 0.9979
MAE: 0.0047
Accuracy: 0.9979


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

df = pd.read_csv('/content/Cleaned_Crop_Yield_Dataset.csv')
df.columns = df.columns.str.strip()
cat_cols = df.select_dtypes(include=['object']).columns.tolist()
if len(cat_cols) > 0:
    df = pd.get_dummies(df, columns=cat_cols, drop_first=True)

X = df.drop(columns=['Yield'])
y = df['Yield']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)
xgb_model = XGBRegressor(random_state=42, objective='reg:squarederror')
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("XGBoost Regression Performance:")
print(f"RMSE: {rmse:.4f}")
print(f"R2 Score: {r2:.4f}")
print(f"MAE: {mae:.4f}")
print(f"Accuracy: {r2:.4f}")  # Accuracy as R2 score


XGBoost Regression Performance:
RMSE: 0.0134
R2 Score: 0.9998
MAE: 0.0023
Accuracy: 0.9998


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingRegressor, RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Load dataset
df = pd.read_csv('/content/Cleaned_Crop_Yield_Dataset.csv')
df.columns = df.columns.str.strip()

X = df.drop(columns=['Yield'])
y = df['Yield']
X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Base models
base_models = [
    ('lr', LinearRegression()),
    ('rf', RandomForestRegressor(n_estimators=200, random_state=42))
]

# Stacking Regressor (meta-model)
stack_model = StackingRegressor(estimators=base_models, final_estimator=LinearRegression())

# Train
stack_model.fit(X_train, y_train)

# Predict
y_pred = stack_model.predict(X_test)

# Evaluate
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"Stacking Model Performance(LinearRegression+RandomForestRegressor):")
print(f"RMSE: {rmse:.4f}")
print(f"R2 Score: {r2:.4f}")
print(f"Accuracy: {r2:.4f}")  # Accuracy as R2 score


Stacking Model Performance(LinearRegression+RandomForestRegressor):
RMSE: 0.0000
R2 Score: 1.0000
Accuracy: 1.0000


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Load dataset
df = pd.read_csv('/content/Cleaned_Crop_Yield_Dataset.csv')
df.columns = df.columns.str.strip()

# Prepare features and target
X = df.drop(columns=['Yield'])
y = df['Yield']

# One-hot encode categorical variables if any
X = pd.get_dummies(X, drop_first=True)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Base regressors
estimators = [
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('xgb', XGBRegressor(random_state=42, objective='reg:squarederror')),
]

# Meta regressor
final_estimator = LinearRegression()

# Stacking regressor
stacking_regressor = StackingRegressor(
    estimators=estimators,
    final_estimator=final_estimator,
    cv=5
)

# Train stacking model
stacking_regressor.fit(X_train, y_train)

# Predict and evaluate
y_pred = stacking_regressor.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("Stacking Model Performance(RandomForestRegressor+XGBRegressor):")
print(f"RMSE: {rmse:.4f}")
print(f"R2 Score: {r2:.4f}")
print(f"Accuracy: {r2:.4f}")  # Showing accuracy as R2 Score


Stacking Model Performance(RandomForestRegressor+XGBRegressor):
RMSE: 0.0194
R2 Score: 0.9997
Accuracy: 0.9997
