In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier,
VotingClassifier

In [2]:
crop_df = pd.read_csv('crop_recommendation.csv')
crop_df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [3]:
label_encoder = LabelEncoder()
crop_df['label'] = label_encoder.fit_transform(crop_df['label'])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(crop_df.drop(['label'], axis=1), crop_df['label'], test_size=0.2, random_state=42)

In [5]:
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

In [6]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_std, y_train)

RandomForestClassifier(random_state=42)

In [8]:
fertilizer_df = pd.read_csv('fertilizer.csv')

In [9]:
fertilizer_df['Crop'] = label_encoder.fit_transform(fertilizer_df['Crop'])

X_train_fert, X_test_fert, y_train_fert, y_test_fert = train_test_split(fertilizer_df.drop(['Crop'], axis=1), fertilizer_df['Crop'], test_size=0.2, random_state=42)


In [10]:
scaler_fert = StandardScaler()
X_train_fert_std = scaler_fert.fit_transform(X_train_fert)
X_test_fert_std = scaler_fert.transform(X_test_fert)

In [11]:
rf_classifier_fert = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier_fert.fit(X_train_fert_std, y_train_fert)


RandomForestClassifier(random_state=42)

In [13]:
ensemble_classifier = VotingClassifier([('rf', rf_classifier), ('rf_fert', rf_classifier_fert)])
ensemble_classifier.fit(X_train_std, y_train)


VotingClassifier(estimators=[('rf', RandomForestClassifier(random_state=42)),
                             ('rf_fert',
                              RandomForestClassifier(random_state=42))])

In [14]:
y_pred_ensemble_classifier = ensemble_classifier.predict(X_test_std)

In [15]:
print('Ensemble Classifier:')
print('Accuracy:', accuracy_score(y_test, y_pred_ensemble_classifier))
print('\n')

Ensemble Classifier:
Accuracy: 0.9931818181818182




In [17]:
input_features = {
    'N': [90],
    'P': [42],
    'K': [20],
    'temperature': [20.879744],
    'humidity': [82.002744],
    'ph': [6.502985],
    
    'rainfall': [202.935536],
}

input_df = pd.DataFrame(input_features)

input_std = scaler.transform(input_df)

crop_prediction = ensemble_classifier.predict(input_std)

predicted_crop = label_encoder.inverse_transform(crop_prediction)

print('Predicted Crop:', predicted_crop)

Predicted Crop: ['rice']


In [18]:
import pandas as pd

fertilizer_df = pd.read_csv('fertilizer.csv')

In [19]:
def calculate_required_npk(crop, current_n, current_p, current_k):
    
    crop_data = fertilizer_df[fertilizer_df['Crop'] == crop]
    
    if crop_data.empty:
        return "Crop not found in the dataset"
    
    
    required_n = crop_data['N'].values[0]
    required_p = crop_data['P'].values[0]
    required_k = crop_data['K'].values[0]
    
    
    required_n -= current_n
    required_p -= current_p
    required_k -= current_k
    
    required_n = max(0, required_n)
    required_p = max(0, required_p)
    required_k = max(0, required_k)
    
    return {'Required N': required_n, 'Required P': required_p, 'Required K': required_k}

In [20]:
crop = predicted_crop[0]  
current_n = input_features['N'][0]  
current_p = input_features['P'][0]  
current_k = input_features['K'][0]  

required_npk = calculate_required_npk(crop, current_n, current_p, current_k)
print(required_npk)

{'Required N': 0, 'Required P': 0, 'Required K': 20}


In [21]:
import pandas as pd
from sklearn.linear_model import LinearRegression

In [22]:
yield_df=pd.read_csv('raw_districtwise_yield_data.csv')
yield_df['Crop'] = yield_df['Crop'].str.lower()
yield_df.head()

Unnamed: 0,State_Name,District_Name,Crop_Year,Season,Crop,Area,Production
0,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,arecanut,1254.0,2000.0
1,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,other kharif pulses,2.0,1.0
2,Andaman and Nicobar Islands,NICOBARS,2000,Kharif,rice,102.0,321.0
3,Andaman and Nicobar Islands,NICOBARS,2000,Whole Year,banana,176.0,641.0
4,Andaman and Nicobar Islands,NICOBARS,2000,Whole Year,cashewnut,720.0,165.0


In [23]:
production_df = yield_df.groupby(['Crop', 'State_Name', 'Crop_Year'], as_index=False)['Production'].mean()
production_df

Unnamed: 0,Crop,State_Name,Crop_Year,Production
0,apple,Tamil Nadu,2002,0.000000
1,apple,Tamil Nadu,2003,0.000000
2,arcanut (processed),Karnataka,2002,9641.550000
3,arecanut,Andaman and Nicobar Islands,2000,3600.000000
4,arecanut,Andaman and Nicobar Islands,2001,3650.000000
...,...,...,...,...
12981,wheat,West Bengal,2012,49773.722222
12982,wheat,West Bengal,2013,51546.500000
12983,wheat,West Bengal,2014,52180.777778
12984,yam,Tamil Nadu,2002,0.000000


In [24]:
area_df = yield_df.groupby(['Crop','State_Name','Crop_Year'],as_index=False)['Area'].mean()
area_df

Unnamed: 0,Crop,State_Name,Crop_Year,Area
0,apple,Tamil Nadu,2002,2.000000
1,apple,Tamil Nadu,2003,2.500000
2,arcanut (processed),Karnataka,2002,7205.800000
3,arecanut,Andaman and Nicobar Islands,2000,2177.000000
4,arecanut,Andaman and Nicobar Islands,2001,2177.000000
...,...,...,...,...
12981,wheat,West Bengal,2012,17865.111111
12982,wheat,West Bengal,2013,18415.611111
12983,wheat,West Bengal,2014,18591.111111
12984,yam,Tamil Nadu,2002,50.555556


In [25]:
production_per_area_df=pd.DataFrame(production_df['Production']/area_df['Area'])
production_per_area_df

Unnamed: 0,0
0,0.000000
1,0.000000
2,1.338026
3,1.653652
4,1.676619
...,...
12981,2.786085
12982,2.799065
12983,2.806760
12984,0.000000


In [26]:
new_yield_df= pd.concat([production_df, production_per_area_df],axis = 1, join = 'outer', ignore_index=False, sort=False)
new_yield_df

Unnamed: 0,Crop,State_Name,Crop_Year,Production,0
0,apple,Tamil Nadu,2002,0.000000,0.000000
1,apple,Tamil Nadu,2003,0.000000,0.000000
2,arcanut (processed),Karnataka,2002,9641.550000,1.338026
3,arecanut,Andaman and Nicobar Islands,2000,3600.000000,1.653652
4,arecanut,Andaman and Nicobar Islands,2001,3650.000000,1.676619
...,...,...,...,...,...
12981,wheat,West Bengal,2012,49773.722222,2.786085
12982,wheat,West Bengal,2013,51546.500000,2.799065
12983,wheat,West Bengal,2014,52180.777778,2.806760
12984,yam,Tamil Nadu,2002,0.000000,0.000000


In [27]:
def predict_future_production(year, crop_name, state):
    
    filtered_df = new_yield_df[(new_yield_df['Crop'] == crop_name) & 
                                     (new_yield_df['State_Name'] == state)]
    
    
    X = filtered_df[['Crop_Year']]
    y = filtered_df[0]
    
    
    model = LinearRegression()
    model.fit(X, y)
    
    
    future_production = model.predict([[year]])
    
    return future_production[0]

# Example 
future_year = 2023
crop = predicted_crop[0]
state = 'Tamil Nadu'
area_owned= 5

predicted_production = predict_future_production(future_year, crop, state)
print('Predicted Production:',predicted_production)

Predicted Production: 3.4358514102369995
