In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Load processed data
df = pd.read_csv('../data/processed/maize_yield_kenya_processed.csv')

# Split data into features and target
X = df[['average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp', 'rainfall_to_temp_ratio']]
y = df['hg/ha_yield']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate model
y_pred = rf_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"MSE: {mse}, R²: {r2}")

# Save model
joblib.dump(rf_model, '../models/maize_yield_kenya_model.pkl')

MSE: 2776395.1607000013, R²: -0.7676148311801392


['../models/maize_yield_kenya_model.pkl']

In [10]:
import os

# Check if the model file exists
model_path = '../models/maize_yield_kenya_model.pkl'
if os.path.exists(model_path):
    print(f"Model file exists at: {model_path}")
else:
    print(f"Model file does not exist at: {model_path}")

Model file exists at: ../models/maize_yield_kenya_model.pkl
