In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns


crop_data = pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')


print(crop_data.columns)


crop_type_column = 'label'  # Replace with the actual column name

# Encode the target variable (crop type)
le = LabelEncoder()
crop_data[crop_type_column] = le.fit_transform(crop_data[crop_type_column])

# Normalize numerical features
scaler = StandardScaler()
numerical_features = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
crop_data[numerical_features] = scaler.fit_transform(crop_data[numerical_features])

# Define features and target variable
X = crop_data[numerical_features]
y = crop_data[crop_type_column]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Display classification report
print(classification_report(y_test, y_pred, target_names=le.classes_))

# Example input data with feature names
input_data = pd.DataFrame([[90, 42, 43, 20.87, 82.00, 6.5, 202.93]], columns=numerical_features)

# Normalize the input data
input_data = scaler.transform(input_data)

# Predict the crop
predicted_crop = model.predict(input_data)
predicted_crop_name = le.inverse_transform(predicted_crop)
print(f'Recommended Crop: {predicted_crop_name}')


Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')
Accuracy: 0.9931818181818182
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       1.00      1.00      1.00        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        17
      grapes       1.00      1.00      1.00        14
        jute       0.92      1.00      0.96        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.92      1.00      0.96        11
       maize       1.00      1.00      1.00        21
       mango       1.00      1.00      1.00        19
   mothbeans       1.00      0.96      0.98        24
    mungbean       1.00      1.00      1.00        19
   muskmelon 



In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Load your crop nutrient data
data = pd.read_csv('/kaggle/input/crop-nutrient-database/crops.csv')

# Print the column names to identify the correct features
print(data.columns)

# Replace 'feature1', 'feature2', 'feature3' with actual column names from your dataset
# For example, let's use 'AvN%(dry)', 'AvMoisture%', and 'AvYieldUnitWeight(lb)' as features
X = data[['AvN%(dry)', 'AvMoisture%', 'AvYieldUnitWeight(lb)']]  # Replace with your actual feature columns
y = data['AvYieldUnitWeight(lb)']  # Replace with your actual target column

# Clean the data by replacing non-numeric values with NaN and then filling them with the mean of the column
X = X.apply(pd.to_numeric, errors='coerce')
X.fillna(X.mean(), inplace=True)

# Ensure the target column is numeric and handle NaN values
y = pd.to_numeric(y, errors='coerce')
y.fillna(y.mean(), inplace=True)

# Check for any remaining NaN values
print(X.isna().sum())
print(y.isna().sum())

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the model
model = RandomForestRegressor()
model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Save the model for future use
import joblib
joblib.dump(model, 'nutrient_management_model.pkl')


Index(['Crop', 'ScientificName', 'Symbol', 'NuContAvailable',
       'PlantPartHarvested', 'CropCategory', 'YieldUnit',
       'AvYieldUnitWeight(lb)', 'AvMoisture%', 'AvN%(dry)',
       ...
       'N%(wet)_M-FF', 'P%(wet)_M-FF', 'gP/100g(wet)_AgH8-9',
       'gP/100g(wet)_AgH8-12', 'gP/100g(wet)_B788', 'P%(wet)_M&L',
       'K%(wet)_M-FF', 'gK/100g(wet)_AgH8-9', 'gK/100g(wet)_AgH8-12',
       'gK/100g(wet)_B788'],
      dtype='object', length=161)
AvN%(dry)                0
AvMoisture%              0
AvYieldUnitWeight(lb)    0
dtype: int64
0
Mean Squared Error: 17.680802753277028


['nutrient_management_model.pkl']

In [3]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Define the directory path and file name
directory_path = '/kaggle/input/crop-yield-prediction-dataset'
file_name = 'yield.csv'
file_path = os.path.join(directory_path, file_name)

# Load the new data
new_data = pd.read_csv(file_path)

# Print the first few rows to understand the structure
print(new_data.head())

# Select the feature columns (adjust column names as needed)
# Using 'Year' and 'Item' as features for simplicity
X_new = new_data[['Year', 'Item']]

# Convert categorical 'Item' column to numeric using one-hot encoding
X_new = pd.get_dummies(X_new, columns=['Item'])

# Clean the new data by replacing non-numeric values with NaN and then filling them with the mean of the column
X_new = X_new.apply(pd.to_numeric, errors='coerce')
X_new.fillna(X_new.mean(), inplace=True)

# Standardize the features
scaler = StandardScaler()
X_new_scaled = scaler.fit_transform(X_new)

# Initialize and train the model (using the same steps as before)
model = RandomForestRegressor()
model.fit(X_new_scaled, new_data['Value'])  # Assuming 'Value' is the target column

# Make predictions
predictions = model.predict(X_new_scaled)

# Print the predictions
print(predictions)

# Evaluate the model (optional, if you have a test set)
mse = mean_squared_error(new_data['Value'], predictions)
print(f'Mean Squared Error: {mse}')


  Domain Code Domain  Area Code         Area  Element Code Element  Item Code  \
0          QC  Crops          2  Afghanistan          5419   Yield         56   
1          QC  Crops          2  Afghanistan          5419   Yield         56   
2          QC  Crops          2  Afghanistan          5419   Yield         56   
3          QC  Crops          2  Afghanistan          5419   Yield         56   
4          QC  Crops          2  Afghanistan          5419   Yield         56   

    Item  Year Code  Year   Unit  Value  
0  Maize       1961  1961  hg/ha  14000  
1  Maize       1962  1962  hg/ha  14000  
2  Maize       1963  1963  hg/ha  14260  
3  Maize       1964  1964  hg/ha  14257  
4  Maize       1965  1965  hg/ha  14400  
[14619.18291952 15035.67381969 15257.70782441 ... 32960.20022852
 34060.07496573 33553.80456013]
Mean Squared Error: 2328859297.9933157


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Load the dataset
data = pd.read_csv('/kaggle/input/crop-yield-prediction-dataset/yield.csv')

# Inspect the data
print(data.head())

# Select relevant features
# Using 'Year' and 'Item' as features for simplicity
features = ['Year', 'Item']
X = data[features]
y = data['Value']  # Assuming 'Value' is the target column representing yield

# Convert categorical 'Item' column to numeric using one-hot encoding
X = pd.get_dummies(X, columns=['Item'])

# Clean the data by replacing non-numeric values with NaN and then filling them with the mean of the column
X = X.apply(pd.to_numeric, errors='coerce')
X.fillna(X.mean(), inplace=True)

# Ensure the target column is numeric and handle NaN values
y = pd.to_numeric(y, errors='coerce')
y.fillna(y.mean(), inplace=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the model
model = RandomForestRegressor()
model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Save the model for future use
import joblib
joblib.dump(model, 'improved_crop_yield_model.pkl')


  Domain Code Domain  Area Code         Area  Element Code Element  Item Code  \
0          QC  Crops          2  Afghanistan          5419   Yield         56   
1          QC  Crops          2  Afghanistan          5419   Yield         56   
2          QC  Crops          2  Afghanistan          5419   Yield         56   
3          QC  Crops          2  Afghanistan          5419   Yield         56   
4          QC  Crops          2  Afghanistan          5419   Yield         56   

    Item  Year Code  Year   Unit  Value  
0  Maize       1961  1961  hg/ha  14000  
1  Maize       1962  1962  hg/ha  14000  
2  Maize       1963  1963  hg/ha  14260  
3  Maize       1964  1964  hg/ha  14257  
4  Maize       1965  1965  hg/ha  14400  
Mean Squared Error: 2409671809.0282454


['improved_crop_yield_model.pkl']