In [2]:
import pandas as pd

# Replace with your actual file path
df = pd.read_csv('/content/crop_yield.csv')
df.head()

Unnamed: 0,Crop,Crop_Year,Season,State,Area,Production,Annual_Rainfall,Fertilizer,Pesticide,Yield
0,Arecanut,1997,Whole Year,Assam,73814.0,56708,2051.4,7024878.38,22882.34,0.796087
1,Arhar/Tur,1997,Kharif,Assam,6637.0,4685,2051.4,631643.29,2057.47,0.710435
2,Castor seed,1997,Kharif,Assam,796.0,22,2051.4,75755.32,246.76,0.238333
3,Coconut,1997,Whole Year,Assam,19656.0,126905000,2051.4,1870661.52,6093.36,5238.051739
4,Cotton(lint),1997,Kharif,Assam,1739.0,794,2051.4,165500.63,539.09,0.420909


In [3]:
print(df.columns)

Index(['Crop', 'Crop_Year', 'Season', 'State', 'Area', 'Production',
       'Annual_Rainfall', 'Fertilizer', 'Pesticide', 'Yield'],
      dtype='object')


In [4]:
# Fill missing values
df.ffill(inplace=True)

# Normalize numerical features
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df[['Area', 'Production', 'Annual_Rainfall', 'Fertilizer', 'Pesticide']] = scaler.fit_transform(
    df[['Area', 'Production', 'Annual_Rainfall', 'Fertilizer', 'Pesticide']]
)

# Encode categorical features
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['Crop'] = le.fit_transform(df['Crop'])
df['Season'] = le.fit_transform(df['Season'])
df['State'] = le.fit_transform(df['State'])

In [5]:
# Features and target
X = df[['Crop', 'Season', 'State', 'Area', 'Production', 'Annual_Rainfall', 'Fertilizer', 'Pesticide']]
y = df['Yield']

# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [7]:
from sklearn.metrics import r2_score, mean_squared_error

y_pred = model.predict(X_test)
print("R¬≤ Score:", r2_score(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

R¬≤ Score: 0.9040557837467855
Mean Squared Error: 76874.33086944661


In [8]:
import pickle
pickle.dump(model, open('crop_yield_model.pkl', 'wb'))

In [9]:
with open('app.py', 'w') as f:
    f.write(code)

NameError: name 'code' is not defined

In [10]:
with open('requirements.txt', 'w') as f:
    f.write("streamlit\nnumpy\nscikit-learn\npandas")

In [11]:
import pickle
import gzip

with gzip.open('crop_yield_model_compressed.pkl.gz', 'wb') as f:
    pickle.dump(model, f)

In [12]:
import gzip
import pickle

with gzip.open('crop_yield_model_compressed.pkl.gz', 'rb') as f:
    model = pickle.load(f)

In [15]:
import streamlit as st
import pandas as pd
import numpy as np
import pickle
import gdown
import os

# -------------------------------------------------------
# App Setup
# -------------------------------------------------------
st.set_page_config(page_title="üåæ Crop Yield Prediction", layout="centered")
st.title("üåæ Crop Yield Prediction System")
st.write("Predict crop yield based on rainfall, soil type, and other factors.")

# -------------------------------------------------------
# Download Model from Google Drive if not exists
# -------------------------------------------------------
MODEL_FILENAME = "crop_yield_model.pkl"

# üîó REPLACE the below with your actual Google Drive FILE ID
GOOGLE_DRIVE_FILE_ID = "10_I6-gmrgMcELV9hem7SzBt61llgNur9"
DRIVE_URL = f"https://drive.google.com/uc?id={GOOGLE_DRIVE_FILE_ID}"

@st.cache_resource
def load_model():
    if not os.path.exists(MODEL_FILENAME):
        with st.spinner("Downloading model file..."):
            gdown.download(DRIVE_URL, MODEL_FILENAME, quiet=False)
            st.success("Model downloaded successfully ‚úÖ")

    try:
        with open(MODEL_FILENAME, "rb") as f:
            model = pickle.load(f)
        return model
    except Exception as e:
        st.error(f"Error loading model: {e}")
        return None

model = load_model()

if model is None:
    st.stop()

# -------------------------------------------------------
# Input Section
# -------------------------------------------------------
st.subheader("üìã Enter Crop Details")

col1, col2 = st.columns(2)
with col1:
    rainfall = st.number_input("Rainfall (in mm)", min_value=0.0, step=0.1)
    temperature = st.number_input("Temperature (¬∞C)", min_value=0.0, step=0.1)
    humidity = st.number_input("Humidity (%)", min_value=0.0, max_value=100.0, step=0.1)

with col2:
    soil_type = st.selectbox("Soil Type", ["Sandy", "Loamy", "Clayey", "Black", "Red"])
    crop_type = st.selectbox("Crop Type", ["Wheat", "Rice", "Maize", "Sugarcane", "Cotton"])
    fertilizer = st.number_input("Fertilizer Used (kg/ha)", min_value=0.0, step=0.1)

# -------------------------------------------------------
# Prepare Input Data
# -------------------------------------------------------
input_data = pd.DataFrame({
    'rainfall': [rainfall],
    'temperature': [temperature],
    'humidity': [humidity],
    'soil_type': [soil_type],
    'crop_type': [crop_type],
    'fertilizer': [fertilizer]
})

st.write("### üßæ Input Summary:")
st.dataframe(input_data)

# -------------------------------------------------------
# Predict Button
# -------------------------------------------------------
if st.button("üå± Predict Crop Yield"):
    try:
        prediction = model.predict(input_data)
        st.success(f"Predicted Crop Yield: **{prediction[0]:.2f} tonnes/ha**")
    except Exception as e:
        st.error(f"Prediction error: {e}")

# -------------------------------------------------------
# Footer
# -------------------------------------------------------
st.markdown("---")
st.caption("Developed with ‚ù§Ô∏è using Streamlit and scikit-learn.")


2025-10-31 11:02:46.978 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-10-31 11:02:47.084 Session state does not function when running a script without `streamlit run`


DeltaGenerator()

In [14]:
!pip install -r requirements.txt

Collecting streamlit (from -r requirements.txt (line 1))
  Downloading streamlit-1.51.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit->-r requirements.txt (line 1))
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.51.0-py3-none-any.whl (10.2 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m10.2/10.2 MB[0m [31m57.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m132.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.51.0
