CROP YIELD PREDICTION

In [None]:
# step 1 to install the necessary libraries
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score


In [None]:
#step2 to upload and read the dataset
import csv
from google.colab import files
uploaded=files.upload()


Saving crop_yield.csv to crop_yield.csv


In [None]:
#step 3 to read the dataset
try:
    df=pd.read_csv('crop_yield.csv')
    Crop=df["Crop"].unique().tolist()
    State=df["State"].unique().tolist()
    Season=df["Season"].unique().tolist()
    print("dataset loaded succesfully")
    print("first 5 rows of dataset:")
    print(df.head())
except FileNotFoundError:
  print("error not found")
  exit()

dataset loaded succesfully
first 5 rows of dataset:
           Crop  Crop_Year       Season  State     Area  Production  \
0      Arecanut       1997  Whole Year   Assam  73814.0       56708   
1     Arhar/Tur       1997  Kharif       Assam   6637.0        4685   
2   Castor seed       1997  Kharif       Assam    796.0          22   
3      Coconut        1997  Whole Year   Assam  19656.0   126905000   
4  Cotton(lint)       1997  Kharif       Assam   1739.0         794   

   Annual_Rainfall  Fertilizer  Pesticide        Yield  
0           2051.4  7024878.38   22882.34     0.796087  
1           2051.4   631643.29    2057.47     0.710435  
2           2051.4    75755.32     246.76     0.238333  
3           2051.4  1870661.52    6093.36  5238.051739  
4           2051.4   165500.63     539.09     0.420909  


In [None]:
#slecting the algorithm ML model
features=['Crop','Crop_Year','Season','State','Area','Annual_Rainfall','Fertilizer','Pesticide']
target='Yield'
x=df[features]
y=df[target]
categorical_features=['Crop','Season','State']
numerical_features=['Crop_Year','Area','Annual_Rainfall','Fertilizer','Pesticide']
preprocessor=ColumnTransformer(
    transformers=[
        ('cat',OneHotEncoder(handle_unknown='ignore'),categorical_features)
      ],
    remainder='passthrough'
)
X_processed=preprocessor.fit_transform(x)
print(f"\n Data preprocessed.Shape of processed features:{X_processed.shape}")


 Data preprocessed.Shape of processed features:(19689, 96)


In [None]:
# step 4 training the data and testing the data
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(
    X_processed,y,test_size=0.2,random_state=42
    )
print("Data split into training and testing sets")
print(f"Training set size:{X_train.shape[0]}samples")
print(f"Testing set size:{X_test.shape[0]}samples")

Data split into training and testing sets
Training set size:15751samples
Testing set size:3938samples


In [None]:
# step 5 training the regressor model
model=RandomForestRegressor(n_estimators=100,random_state=42)
model.fit(X_train,y_train)
print("Random Forest Regressor trained successfully")

Random Forest Regressor trained successfully


In [None]:
#step 6 evaluating model
y_pred=model.predict(X_test)
print("prediction made on the test set")
mae=mean_absolute_error(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
rmse=np.sqrt(mse)
r2=r2_score(y_test,y_pred)
print(f"R-squared (R2):{r2:.2f}")
print(f"Mean Absolute Error(MAE):{mae:.2f}")
print(f"Mean Sqared Error(MSE):{mse:.2f}")
print(f"Root Mean Squared Error (RMSE):{rmse:.2f}")

prediction made on the test set
R-squared (R2):0.98
Mean Absolute Error(MAE):9.48
Mean Sqared Error(MSE):15818.54
Root Mean Squared Error (RMSE):125.77


In [None]:
#step 7 to create gardio interface
def predict_yield(Crop, Crop_Year, Season, State, Area, Annual_Rainfall, Fertilizer, Pesticide):
    input_data = pd.DataFrame({
        'Crop': [Crop],
        'Crop_Year': [Crop_Year],
        'Season': [Season],
        'State': [State],
        'Area': [Area],
        'Annual_Rainfall': [Annual_Rainfall],
        'Fertilizer': [Fertilizer],
        'Pesticide': [Pesticide]
    })

    input_processed = preprocessor.transform(input_data)
    prediction = model.predict(input_processed)[0]
    return round(prediction, 2)
inputs = [
    gr.Dropdown(choices=df['Crop'].unique().tolist(), label="Crop"),
    gr.Number(label="Crop Year"),
    gr.Dropdown(choices=df['Season'].unique().tolist(), label="Season"),
    gr.Dropdown(choices=df['State'].unique().tolist(), label="State"),
    gr.Number(label="Area (in hectares)"),
    gr.Number(label="Annual Rainfall (mm)"),
    gr.Number(label="Fertilizer (kg/ha)"),
    gr.Number(label="Pesticide (kg/ha)")
]

output = gr.Number(label="Predicted Yield (kg/ha)")

app = gr.Interface(
    fn=predict_yield,
    inputs=inputs,
    outputs=output,
    title="Crop Yield Prediction",
    description="Enter crop details and environmental factors to predict yield."
)

app.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ae2714954195b14125.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


