In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

In [15]:
# Load the dataset
df = pd.read_csv('seasonal_coconut_yield_data_3.csv')

In [16]:
df.head(2)

Unnamed: 0,Date,Soil Moisture (10 cm) (%),Soil Moisture (20 cm) (%),Soil Moisture (30 cm) (%),Plant Age (years),Temperature (°C),Humidity (%),Rainfall (mm),Rain Status (0/1),Coconut Yield (kg)
0,2015-01-31,23.272451,21.906751,35.884381,8,31.659224,71.825956,0.0,0,3899.014246
1,2015-02-28,24.573172,49.151363,54.711984,5,30.612772,75.419344,0.0,0,4257.558815


In [17]:
X = df.drop(['Date','Rainfall (mm)','Coconut Yield (kg)'], axis=1) 
y = df['Coconut Yield (kg)']

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
# Create a RandomForestRegressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

In [21]:
y_pred = model.predict(X_test)

In [22]:
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("R-squared (R2 Score):", r2)

Mean Absolute Error (MAE): 1066.1691642193
R-squared (R2 Score): -0.22181300027802853


In [23]:
import joblib

# Save the model to a file
joblib.dump(model, 'yield_predictor.pkl')

['yield_predictor.pkl']

In [24]:
df.head(2)

Unnamed: 0,Date,Soil Moisture (10 cm) (%),Soil Moisture (20 cm) (%),Soil Moisture (30 cm) (%),Plant Age (years),Temperature (°C),Humidity (%),Rainfall (mm),Rain Status (0/1),Coconut Yield (kg)
0,2015-01-31,23.272451,21.906751,35.884381,8,31.659224,71.825956,0.0,0,3899.014246
1,2015-02-28,24.573172,49.151363,54.711984,5,30.612772,75.419344,0.0,0,4257.558815


In [26]:
# Load the model from the file
loaded_model = joblib.load('yield_predictor.pkl')

# Example new input data (as a DataFrame)
new_data = pd.DataFrame({
    'Soil Moisture (10 cm) (%)': [23.27],
    'Soil Moisture (20 cm) (%)': [21.90],
    'Soil Moisture (30 cm) (%)': [35.88],
    'Plant Age (years)':[8],
    'Temperature (°C)':[31.659],
    'Humidity (%)':[71.82],
    'Rain Status (0/1)':[0],
})

# Predict the oil yield for new data
predicted_yield = loaded_model.predict(new_data)
print("Predicted Oil Yield (kg):", predicted_yield[0])

Predicted Oil Yield (kg): 3000.464019557043
