In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import joblib

In [None]:
# Load dataset
df = pd.read_csv("crop_yield.csv")

In [None]:
# Feature engineering
df['fert_irrig_score'] = df['Fertilizer'].astype(int)
df['rainfall_squared'] = df['Annual_Rainfall'] ** 2

In [None]:
# Encode categorical variables
label_encoders = {} # Dictionary to store label encoders
for col in ['State', 'Crop']:
    le = LabelEncoder()
    df[col + '_encoded'] = le.fit_transform(df[col])
    label_encoders[col] = le # Store each fitted encoder


In [None]:
# Features and target
features = [
    'Annual_Rainfall', 'Fertilizer', 'Pesticide',
    'State_encoded', 'Crop_encoded',
    'fert_irrig_score', 'rainfall_squared'
]
X = df[features]
y = df['Yield']

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Train Linear Regression
model = LinearRegression()
model.fit(X_train_scaled, y_train)

In [None]:
# Predict and evaluate
y_pred = model.predict(X_test_scaled)
print(f"R² Score: {r2_score(y_test, y_pred):.4f}")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f} tons/ha")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f} tons/ha")
print(f"MAPE: {mean_absolute_percentage_error(y_test, y_pred)*100:.2f}%")

R² Score: 0.0112
MAE: 179.5260 tons/ha
RMSE: 890.0806 tons/ha
MAPE: 278777342277642752.00%


In [None]:
# Save the model and scaler for later use
joblib.dump(model, 'linear_regression_model.pkl')  # save model
joblib.dump(scaler, 'scaler.pkl')                   # save scaler

print("Model and scaler saved as 'linear_regression_model.pkl' and 'scaler.pkl'.")

Model and scaler saved as 'linear_regression_model.pkl' and 'scaler.pkl'.


In [None]:
import zipfile
from google.colab import files

# Create a Zip archive
with zipfile.ZipFile('model_and_scaler.zip', 'w') as zipf:
    zipf.write('linear_regression_model.pkl')
    zipf.write('scaler.pkl')

# Download the Zip archive
files.download('model_and_scaler.zip')

print("Model and scaler saved as 'model_and_scaler.zip'.")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Model and scaler saved as 'model_and_scaler.zip'.


In [None]:
import joblib
from google.colab import files

# Combine model, scaler, and label encoders into a single dictionary
full_model_package = {
    'model': model,
    'scaler': scaler,
    'label_encoders': label_encoders
}

# Dynamically create filename based on model name
model_name = type(model).__name__
filename = f"{model_name}_model_package.pkl"

# Save the combined package to a single .pkl file
joblib.dump(full_model_package, filename)

# Download the .pkl file
files.download(filename)

print(f"Full model package (model, scaler, and label encoders) saved and downloaded as '{filename}'.")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Full model package (model, scaler, and label encoders) saved and downloaded as 'LinearRegression_model_package.pkl'.


In [None]:
print(df.columns)

Index(['Crop', 'Crop_Year', 'Season', 'State', 'Area', 'Production',
       'Annual_Rainfall', 'Fertilizer', 'Pesticide', 'Yield',
       'fert_irrig_score', 'rainfall_squared', 'State_encoded',
       'Crop_encoded'],
      dtype='object')
