<a href="https://colab.research.google.com/github/sushmithashenoy07/AgriMitra/blob/main/crop_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [None]:
# Load the crop recommendation and market price datasets
crop_data_file = '/Crop_recommendation.csv'  # Replace with actual path
market_data_file = '/cropmarketprice.csv'    # Replace with actual path


In [None]:
crop_data = pd.read_csv(crop_data_file)
market_data = pd.read_csv(market_data_file)

In [None]:
# Check the column names of both dataframes
print("Crop Data Columns:", crop_data.columns)
print("Market Data Columns:", market_data.columns)


Crop Data Columns: Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')
Market Data Columns: Index(['State', 'District', 'Market', 'Commodity', 'Variety', 'Grade',
       'Arrival_Date', 'Min_x0020_Price', 'Max_x0020_Price',
       'Modal_x0020_Price'],
      dtype='object')


In [None]:
# Merge the two datasets on 'label' (from crop_data) and 'Commodity' (from market_data)
merged_data = pd.merge(crop_data, market_data, left_on='label', right_on='Commodity')


In [None]:
# Features from crop data: N, P, K, temperature, humidity, ph, rainfall
X = merged_data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]

# Target variable: Modal price from market data
y = merged_data['Modal_x0020_Price']

In [None]:
print("Crop Data Preview:")
print(crop_data.head())

Crop Data Preview:
    N   P   K  temperature   humidity        ph    rainfall label
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice
1  85  58  41    21.770462  80.319644  7.038096  226.655537  rice
2  60  55  44    23.004459  82.320763  7.840207  263.964248  rice
3  74  35  40    26.491096  80.158363  6.980401  242.864034  rice
4  78  42  42    20.130175  81.604873  7.628473  262.717340  rice


In [None]:
print("Market Data Preview:")
print(market_data.head())

Market Data Preview:
            State       District        Market                Commodity  \
0  Andhra Pradesh  East Godavari        Karapa      Paddy(Dhan)(Common)   
1  Andhra Pradesh  East Godavari   Ravulapelem                   Banana   
2  Andhra Pradesh         Guntur        Tenali                    Lemon   
3  Andhra Pradesh        Kurnool       Nandyal      Paddy(Dhan)(Common)   
4           Assam        Barpeta  Barpeta Road  French Beans (Frasbean)   

                   Variety  Grade Arrival_Date  Min_x0020_Price  \
0                     1001    FAQ   19/09/2024           2183.0   
1        Chakkarakeli(Red)  Large   19/09/2024           2400.0   
2                    Lemon    FAQ   19/09/2024           4200.0   
3             Sona Mahsuri    FAQ   19/09/2024           2290.0   
4  French Beans (Frasbean)  Local   19/09/2024           3800.0   

   Max_x0020_Price  Modal_x0020_Price  
0           2200.0             2183.0  
1           3200.0             2800.0  
2    

In [None]:
crop_data['label'] = crop_data['label'].str.strip()
market_data['Commodity'] = market_data['Commodity'].str.strip()

In [None]:
# Merge the two datasets again
merged_data = pd.merge(crop_data, market_data, left_on='label', right_on='Commodity')


In [None]:
# Check if the merged data has rows
print(f"Merged Data Shape: {merged_data.shape}")
print("Merged Data Preview:")
print(merged_data.head())

Merged Data Shape: (0, 18)
Merged Data Preview:
Empty DataFrame
Columns: [N, P, K, temperature, humidity, ph, rainfall, label, State, District, Market, Commodity, Variety, Grade, Arrival_Date, Min_x0020_Price, Max_x0020_Price, Modal_x0020_Price]
Index: []


In [None]:
# Print unique crop names from both datasets
print("Unique crop names in crop data (label column):")
print(crop_data['label'].unique())

print("\nUnique commodities in market data (Commodity column):")
print(market_data['Commodity'].unique())


Unique crop names in crop data (label column):
['rice' 'maize' 'chickpea' 'kidneybeans' 'pigeonpeas' 'mothbeans'
 'mungbean' 'blackgram' 'lentil' 'pomegranate' 'banana' 'mango' 'grapes'
 'watermelon' 'muskmelon' 'apple' 'orange' 'papaya' 'coconut' 'cotton'
 'jute' 'coffee']

Unique commodities in market data (Commodity column):
['Paddy(Dhan)(Common)' 'Banana' 'Lemon' 'French Beans (Frasbean)'
 'Bhindi(Ladies Finger)' 'Cabbage' 'Pumpkin' 'Capsicum' 'Onion' 'Potato'
 'Bottle gourd' 'Tomato' 'Soyabean' 'Coriander(Leaves)'
 'Green Gram (Moong)(Whole)' 'Cucumbar(Kheera)' 'Ginger(Green)'
 'Green Chilli' 'Papaya' 'Pomegranate' 'Ridgeguard(Tori)'
 'Bajra(Pearl Millet/Cumbu)' 'Castor Seed' 'Isabgul (Psyllium)' 'Soanf'
 'Suva (Dill Seed)' 'Maize' 'Cotton' 'Wheat' 'Arhar (Tur/Red Gram)(Whole)'
 'Bitter gourd' 'Brinjal' 'Corriander seed' 'Groundnut'
 'Sesamum(Sesame,Gingelly,Til)' 'Mustard' 'Black Gram (Urd Beans)(Whole)'
 'Millets' 'Methi Seeds' 'Bengal Gram(Gram)(Whole)' 'Cummin Seed(Jeera)'
 'K

In [None]:
crop_to_commodity_mapping = {
    'rice': 'Paddy(Dhan)(Common)',
    'maize': 'Maize',
    'chickpea': 'Kabuli Chana(Chickpeas-White)',
    'kidneybeans': 'Rajma',
    'pigeonpeas': 'Arhar (Tur/Red Gram)(Whole)',
    'mothbeans': 'Moth Beans',
    'mungbean': 'Green Gram (Moong)(Whole)',
    'blackgram': 'Black Gram (Urd Beans)(Whole)',
    'lentil': 'Lentil (Masur)(Whole)',
    'pomegranate': 'Pomegranate',
    'banana': 'Banana',
    'mango': 'Mango',
    'grapes': 'Grapes',
    'watermelon': 'Water Melon',
    'muskmelon': 'Karbuja(Musk Melon)',
    'apple': 'Apple',
    'orange': 'Orange',
    'papaya': 'Papaya',
    'coconut': 'Coconut',
    'cotton': 'Cotton',
    'jute': 'Jute',
    'coffee': 'Coffee',
    'pigeonpeas': 'Arhar (Tur/Red Gram)(Whole)',
    'mustard': 'Mustard',
    'soybean': 'Soyabean',
    'groundnut': 'Groundnut',
    'wheat': 'Wheat',
    'millets': 'Millets',
    'sugarcane': 'Sugarcane',
    'potato': 'Potato',
    'onion': 'Onion',
    'tomato': 'Tomato',
    'brinjal': 'Brinjal',
    'cabbage': 'Cabbage',
    'cauliflower': 'Cauliflower',
    'bhindi': 'Bhindi(Ladies Finger)',
    'pumpkin': 'Pumpkin',
    'bottle_gourd': 'Bottle gourd',
    'ridge_gourd': 'Ridgeguard(Tori)',
    'bitter_gourd': 'Bitter gourd',
    'chillies': 'Green Chilli',
    'garlic': 'Garlic',
    'ginger': 'Ginger(Green)',
    'turmeric': 'Turmeric',
    'spinach': 'Spinach',
    'coriander': 'Coriander(Leaves)',
    'mint': 'Mint(Pudina)',
    'beetroot': 'Beetroot',
    'carrot': 'Carrot',
    'peas': 'Green Peas',
    'guava': 'Guava',
    'pineapple': 'Pineapple',
    'lime': 'Lime',
    'mousambi': 'Mousambi(Sweet Lime)',
    'jackfruit': 'Jack Fruit',
    'cashew': 'Cashewnuts',
    'sapota': 'Chikoos(Sapota)',
    'fig': 'Fig(Anjura/Anjeer)',
    'amla': 'Amla(Nelli Kai)',
    'jamun': 'Jamun(Narale Hannu)',
    'custard_apple': 'Custard Apple (Sharifa)',
    # Add additional mappings as needed if any are missing
}

In [None]:
# Replace the 'label' values in crop_data with corresponding 'Commodity' values from market_data
crop_data['label_mapped'] = crop_data['label'].map(crop_to_commodity_mapping)


In [None]:
# Check if there are any unmapped values
unmapped = crop_data[crop_data['label_mapped'].isna()]
print(f"Unmapped crops: {unmapped['label'].unique()}")

Unmapped crops: []


In [None]:
crop_data = crop_data.dropna(subset=['label_mapped'])

In [None]:
# Merge crop data with market data on the mapped labels
merged_data = pd.merge(crop_data, market_data, left_on='label_mapped', right_on='Commodity')


In [None]:
# Check the result of the merge
print(f"Merged Data Shape: {merged_data.shape}")
print("Merged Data Preview:")
print(merged_data.head())

Merged Data Shape: (130700, 19)
Merged Data Preview:
    N   P   K  temperature   humidity        ph    rainfall label  \
0  90  42  43    20.879744  82.002744  6.502985  202.935536  rice   
1  90  42  43    20.879744  82.002744  6.502985  202.935536  rice   
2  90  42  43    20.879744  82.002744  6.502985  202.935536  rice   
3  90  42  43    20.879744  82.002744  6.502985  202.935536  rice   
4  90  42  43    20.879744  82.002744  6.502985  202.935536  rice   

          label_mapped           State       District      Market  \
0  Paddy(Dhan)(Common)  Andhra Pradesh  East Godavari      Karapa   
1  Paddy(Dhan)(Common)  Andhra Pradesh        Kurnool     Nandyal   
2  Paddy(Dhan)(Common)     Chattisgarh       Dhamtari   Belargaon   
3  Paddy(Dhan)(Common)     Chattisgarh       Dhamtari      Boraee   
4  Paddy(Dhan)(Common)     Chattisgarh       Dhamtari  Gattasilli   

             Commodity       Variety Grade Arrival_Date  Min_x0020_Price  \
0  Paddy(Dhan)(Common)          1001   FA

In [None]:
# Feature selection for machine learning
X = merged_data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]  # Features from crop data
y = merged_data['Modal_x0020_Price']  # Target: Market price


In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Predict prices on test set
y_pred = model.predict(X_test)

In [None]:
# Calculate error
from sklearn.metrics import mean_absolute_error
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 8536720.819669448


In [None]:
# Get the top 3 crops based on the highest predicted prices
top_crops = merged_data[['label_mapped', 'Predicted_Price']].sort_values(by='Predicted_Price', ascending=False).head(3)


In [None]:
print("\nTop 3 recommended crops based on market price:")
print(top_crops)


Top 3 recommended crops based on market price:
      label_mapped  Predicted_Price
67660       Grapes     12567.844256
67661       Grapes     12567.844256
67663       Grapes     12567.844256


In [None]:
top_crops.to_csv('top_crops_recommendation.csv', index=False)

In [None]:
print("\nTop 3 recommended crops based on market price:")
print(top_crops)


Top 3 recommended crops based on market price:
      label_mapped  Predicted_Price
67660       Grapes     12567.844256
67661       Grapes     12567.844256
67663       Grapes     12567.844256


In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Calculate MSE
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Calculate RMSE
rmse = mse**0.5
print(f"Root Mean Squared Error: {rmse}")

# Calculate R-squared
r2 = r2_score(y_test, y_pred)
print(f"R-squared: {r2}")


Mean Squared Error: 8536720.819669448
Root Mean Squared Error: 2921.7667291673797
R-squared: 0.5145495227787191


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import StackingRegressor


In [None]:
crop_data = pd.read_csv('//Crop_recommendation.csv')  # Adjust path if necessary
market_data = pd.read_csv('//cropmarketprice.csv')  # Adjust path if necessary


In [None]:
crop_to_commodity_mapping = {
    'rice': 'Paddy(Dhan)(Common)',
    'maize': 'Maize',
    'chickpea': 'Kabuli Chana(Chickpeas-White)',
    'kidneybeans': 'Rajma',
    'pigeonpeas': 'Arhar (Tur/Red Gram)(Whole)',
    'mothbeans': 'Moth Beans',
    'mungbean': 'Green Gram (Moong)(Whole)',
    'blackgram': 'Black Gram (Urd Beans)(Whole)',
    'lentil': 'Lentil (Masur)(Whole)',
    'pomegranate': 'Pomegranate',
    'banana': 'Banana',
    'mango': 'Mango',
    'grapes': 'Grapes',
    'watermelon': 'Water Melon',
    'muskmelon': 'Karbuja(Musk Melon)',
    'apple': 'Apple',
    'orange': 'Orange',
    'papaya': 'Papaya',
    'coconut': 'Coconut',
    'cotton': 'Cotton',
    'jute': 'Jute',
    'coffee': 'Coffee',
    'pigeonpeas': 'Arhar (Tur/Red Gram)(Whole)',
    'mustard': 'Mustard',
    'soybean': 'Soyabean',
    'groundnut': 'Groundnut',
    'wheat': 'Wheat',
    'millets': 'Millets',
    'sugarcane': 'Sugarcane',
    'potato': 'Potato',
    'onion': 'Onion',
    'tomato': 'Tomato',
    'brinjal': 'Brinjal',
    'cabbage': 'Cabbage',
    'cauliflower': 'Cauliflower',
    'bhindi': 'Bhindi(Ladies Finger)',
    'pumpkin': 'Pumpkin',
    'bottle_gourd': 'Bottle gourd',
    'ridge_gourd': 'Ridgeguard(Tori)',
    'bitter_gourd': 'Bitter gourd',
    'chillies': 'Green Chilli',
    'garlic': 'Garlic',
    'ginger': 'Ginger(Green)',
    'turmeric': 'Turmeric',
    'spinach': 'Spinach',
    'coriander': 'Coriander(Leaves)',
    'mint': 'Mint(Pudina)',
    'beetroot': 'Beetroot',
    'carrot': 'Carrot',
    'peas': 'Green Peas',
    'guava': 'Guava',
    'pineapple': 'Pineapple',
    'lime': 'Lime',
    'mousambi': 'Mousambi(Sweet Lime)',
    'jackfruit': 'Jack Fruit',
    'cashew': 'Cashewnuts',
    'sapota': 'Chikoos(Sapota)',
    'fig': 'Fig(Anjura/Anjeer)',
    'amla': 'Amla(Nelli Kai)',
    'jamun': 'Jamun(Narale Hannu)',
    'custard_apple': 'Custard Apple (Sharifa)',
    # Add additional mappings as needed if any are missing
}

In [None]:
crop_data['label_mapped'] = crop_data['label'].map(crop_to_commodity_mapping)

In [None]:
unmapped = crop_data[crop_data['label_mapped'].isna()]
print(f"Unmapped crops: {unmapped['label'].unique()}")

Unmapped crops: []


In [None]:
crop_data = crop_data.dropna(subset=['label_mapped'])

In [None]:
merged_data = pd.merge(crop_data, market_data, left_on='label_mapped', right_on='Commodity')

In [None]:
X = merged_data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]  # Features from crop data
y = merged_data['Modal_x0020_Price']  # Target: Market price

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
svm_model = SVR(kernel='rbf')
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

In [None]:
svm_model.fit(X_train_scaled, y_train)
rf_model.fit(X_train, y_train)

In [None]:
svm_predictions = svm_model.predict(X_test_scaled)
rf_predictions = rf_model.predict(X_test)


In [None]:
combined_predictions = (svm_predictions + rf_predictions) / 2


In [None]:
mse = mean_squared_error(y_test, combined_predictions)
rmse = mse**0.5
r2 = r2_score(y_test, combined_predictions)


In [None]:

print(f"Combined Model Mean Squared Error: {mse}")
print(f"Combined Model Root Mean Squared Error: {rmse}")
print(f"Combined Model R-squared: {r2}")

Combined Model Mean Squared Error: 9660490.704963705
Combined Model Root Mean Squared Error: 3108.132993448592
Combined Model R-squared: 0.4506450518903148


In [None]:
X_scaled = scaler.transform(X)  # Scale the features for SVM
merged_data['Predicted_Price'] = (svm_model.predict(X_scaled) + rf_model.predict(X)) / 2

In [None]:
top_crops_distinct = merged_data[['label_mapped', 'Predicted_Price']] \
    .drop_duplicates(subset=['label_mapped']) \
    .sort_values(by='Predicted_Price', ascending=False) \
    .head(3)

In [None]:
print("\nTop 3 distinct recommended crops based on market price:")
print(top_crops_distinct)


Top 3 distinct recommended crops based on market price:
                        label_mapped  Predicted_Price
23000                    Pomegranate      9437.951367
17000  Kabuli Chana(Chickpeas-White)      8757.602517
66800                         Grapes      8622.173187


In [None]:
top_crops.to_csv('top_crops_recommendation_hybrid.csv', index=False)