In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

In [2]:
# Load the dataset
df = pd.read_csv('copra_drying_dataset_with_oil_yield_3.csv')

In [3]:
df.head(2)

Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Drying Time (hrs),Estimated Oil Yield (kg)
0,127,56.60066,38.811475,75.038997,23.8,4.56
1,102,57.450324,38.597286,71.364034,22.18,5.06


In [4]:
X = df[['Initial Moisture Level (%)', 'Temperature (°C)', 'Humidity (%)']]
y = df['Drying Time (hrs)']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Create a RandomForestRegressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

In [7]:
y_pred = model.predict(X_test)

In [8]:
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("R-squared (R2 Score):", r2)

Mean Absolute Error (MAE): 1.1291308000000009
R-squared (R2 Score): 0.9114445762391661


In [9]:
import joblib

# Save the model to a file
joblib.dump(model, 'drying_time_predictor.pkl')


['drying_time_predictor.pkl']

In [10]:
# Load the model from the file
loaded_model = joblib.load('drying_time_predictor.pkl')

# Example new input data (as a DataFrame)
new_data = pd.DataFrame({
    'Initial Moisture Level (%)': [57],
    'Temperature (°C)': [32],
    'Humidity (%)': [55]
})

# Predict the drying time for new data
predicted_drying_time = loaded_model.predict(new_data)
print("Predicted Drying Time (hrs):", round(predicted_drying_time[0]))


Predicted Drying Time (hrs): 25


For Oil yeld

In [11]:
df.head(2)

Unnamed: 0,Maturity Stage (Days),Initial Moisture Level (%),Temperature (°C),Humidity (%),Drying Time (hrs),Estimated Oil Yield (kg)
0,127,56.60066,38.811475,75.038997,23.8,4.56
1,102,57.450324,38.597286,71.364034,22.18,5.06


In [12]:
X = df[['Initial Moisture Level (%)', 'Temperature (°C)', 'Humidity (%)','Drying Time (hrs)']]
y = df['Estimated Oil Yield (kg)']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# Create a RandomForestRegressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

In [15]:
y_pred = model.predict(X_test)

In [16]:
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("R-squared (R2 Score):", r2)

Mean Absolute Error (MAE): 0.21296620000000008
R-squared (R2 Score): 0.2599032051809347


In [17]:
import joblib

# Save the model to a file
joblib.dump(model, 'oil_yield_predictor.pkl')


['oil_yield_predictor.pkl']

In [18]:
# Load the model from the file
loaded_model = joblib.load('oil_yield_predictor.pkl')

# Example new input data (as a DataFrame)
new_data = pd.DataFrame({
    'Initial Moisture Level (%)': [58],
    'Temperature (°C)': [30],
    'Humidity (%)': [50],
    'Drying Time (hrs)':[26]
})

# Predict the oil yield for new data
predicted_oil_yield = loaded_model.predict(new_data)
print("Predicted Oil Yield (kg):", predicted_oil_yield[0])


Predicted Oil Yield (kg): 5.147400000000002
