In [359]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [360]:
# Load dataset
df = pd.read_csv("crp (1).csv")  # Ensure this file is in the same directory


In [361]:
# Display basic information about the dataset
df.info(), df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1266 entries, 0 to 1265
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Crop             1266 non-null   object 
 1   Crop_Year        1266 non-null   int64  
 2   Season           1266 non-null   object 
 3   State            1266 non-null   object 
 4   Area             1266 non-null   int64  
 5   Production       1266 non-null   int64  
 6   Annual_Rainfall  1266 non-null   float64
 7   Fertilizer       1266 non-null   float64
 8   Pesticide        1266 non-null   float64
 9   Yield            1266 non-null   float64
dtypes: float64(4), int64(3), object(3)
memory usage: 99.0+ KB


(None,
         Crop  Crop_Year       Season           State    Area  Production  \
 0   Arecanut       2000  Whole Year   Andhra Pradesh     262         724   
 1  Arhar/Tur       2000  Kharif       Andhra Pradesh  507366      216457   
 2  Arhar/Tur       2000  Rabi         Andhra Pradesh    6098        2701   
 3      Bajra       2000  Kharif       Andhra Pradesh  129475      135964   
 4      Bajra       2000  Rabi         Andhra Pradesh   14229       12524   
 
    Annual_Rainfall   Fertilizer  Pesticide     Yield  
 0            935.6     25720.54      68.12  2.780000  
 1            935.6  49808120.22  131915.16  0.433182  
 2            935.6    598640.66    1585.48  0.423750  
 3            935.6  12710560.75   33663.50  1.001667  
 4            935.6   1396860.93    3699.54  0.883125  )

In [362]:
# Extract unique crop names
unique_crops = df["Crop"].unique().tolist()
unique_crops[:10]  # Display the first 10 unique crops as a preview


['Arecanut',
 'Arhar/Tur',
 'Bajra',
 'Banana',
 'Cashewnut',
 'Castor seed',
 'Coconut ',
 'Coriander',
 'Cotton(lint)',
 'Dry chillies']

In [363]:
# Extract unique season names as well
unique_seasons = df["Season"].unique().tolist()
unique_seasons


['Whole Year ', 'Kharif     ', 'Rabi       ']

In [364]:
# Handle missing values
df = df.dropna()


In [365]:
# Encode categorical variables (Crop & Season)
crop_encoding = {crop: idx for idx, crop in enumerate(df["Crop"].unique())}
season_encoding = {season: idx for idx, season in enumerate(df["Season"].unique())}
df["Crop"] = df["Crop"].map(crop_encoding)
df["Season"] = df["Season"].map(season_encoding)



In [366]:
# Define features (inputs) and target variables (outputs)
X = df[["Crop", "Season", "Area"]]
y = df[["Annual_Rainfall", "Fertilizer", "Pesticide"]]  # Target variables


In [367]:
# Split dataset into training & testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [368]:
# Train the RandomForestRegressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)  # ✅ Model is trained!


In [369]:
# Save trained model
joblib.dump(model, "regression_model.pkl")
print("✅ Model trained & saved as regression_model.pkl")

✅ Model trained & saved as regression_model.pkl


In [370]:
# Function to predict rainfall, fertilizer use, and pesticide use
def predict_rainfall_fertilizer_pesticide(crop, season, area):
    """Predicts Rainfall, Fertilizer Use, and Pesticide Use based on inputs."""
    # Load trained model
    model = joblib.load("regression_model.pkl")
    
    # Convert input to encoded values
    crop_encoded = crop_encoding.get(crop, None)  # If crop not found, return None
    season_encoded = season_encoding.get(season, None)  # If season not found, return None

    # Check if input values are valid
    if crop_encoded is None:
        return {"Error": f"❌ Invalid Crop: {crop}. Please select from {list(crop_encoding.keys())}"}
    if season_encoded is None:
        return {"Error": f"❌ Invalid Season: {season}. Please select from {list(season_encoding.keys())}"}


In [371]:
import pandas as pd
import joblib

# Load trained model
model = joblib.load("regression_model.pkl")

# Encoding mappings (Ensure these are defined)
crop_encoding = {"Wheat": 0, "Rice": 1, "Maize": 2, "Barley": 3, "Sugarcane": 4}  # Replace with actual crop mappings
season_encoding = {"Kharif": 0, "Rabi": 1, "Whole Year": 2}  # Replace with actual season mappings

# Function to predict values
def predict_values(crop, season, area):
    if crop not in crop_encoding or season not in season_encoding:
        return {"Error": "Invalid input. Check crop or season."}

    crop_encoded = crop_encoding[crop]
    season_encoded = season_encoding[season]

    input_data = pd.DataFrame([[crop_encoded, season_encoded, area]], columns=["Crop", "Season", "Area"])
    prediction = model.predict(input_data)

    return {
        "Predicted Rainfall (mm)": round(prediction[0][0], 2),
        "Predicted Fertilizer Use (kg)": round(prediction[0][1], 2),
        "Predicted Pesticide Use (kg)": round(prediction[0][2], 2)
    }

# Example usage (Replace these values as needed)
crop = "Rice"
season = "Kharif"
area = 10.5  # in hectares

result = predict_values(crop, season, area)
print(result)


{'Predicted Rainfall (mm)': np.float64(916.34), 'Predicted Fertilizer Use (kg)': np.float64(1645.56), 'Predicted Pesticide Use (kg)': np.float64(3.58)}
