In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.read_csv('/content/dataset.csv')
df.head(5)

Unnamed: 0,Location,Soil Type,Rainfall,Area_Cultivated,Production,Crop_Type,Price_per_unit,Investment
0,Delhi,Loam,600,100,1500,Wheat,1800,75000
1,Mumbai,Clay,1200,200,1800,Rice,2500,90000
2,Chennai,Sandy,800,150,1200,Millets,1200,60000
3,Kolkata,Silt,1100,180,2000,Jute,3500,80000
4,Bengaluru,Loam,900,120,900,Maize,1400,65000


In [None]:
df.isnull().sum()

Location           0
Soil Type          0
Rainfall           0
Area_Cultivated    0
Production         0
Crop_Type          0
Price_per_unit     0
Investment         0
dtype: int64

In [None]:
df['Output_Profit'] = df['Production'] * df['Price_per_unit'] - df['Investment']
df.head()

Unnamed: 0,Location,Soil Type,Rainfall,Area_Cultivated,Production,Crop_Type,Price_per_unit,Investment,Output_Profit
0,Delhi,Loam,600,100,1500,Wheat,1800,75000,2625000
1,Mumbai,Clay,1200,200,1800,Rice,2500,90000,4410000
2,Chennai,Sandy,800,150,1200,Millets,1200,60000,1380000
3,Kolkata,Silt,1100,180,2000,Jute,3500,80000,6920000
4,Bengaluru,Loam,900,120,900,Maize,1400,65000,1195000


In [None]:
df['Crop_Type'].value_counts()

Rice          19
Wheat         17
Cotton        16
Tea           12
Soybean       10
Barley         9
Maize          8
Coconut        6
Apple          6
Millets        6
Sugarcane      5
Groundnut      5
Jute           5
Potato         4
Corn           4
Mango          3
Cashew         2
Orange         2
Grapes         2
Banana         2
Pineapple      2
Rubber         2
Strawberry     2
Onion          1
Pepper         1
Chickpeas      1
Name: Crop_Type, dtype: int64

In [None]:
crop_dict={
    'Rice':1,
    'Wheat':2,
    'Cotton':3,
    'Tea':4,
    'Soybean':5,
    'Barley':6,
    'Maize':7,
    'Coconut':8,
    'Apple':9,
    'Millets':10,
    'Sugarcane':11,
    'Groundnut':12,
    'Jute' :13,
    'Potato' :14,
    'Corn' :15,
    'Mango':16,
    'Cashew' :17,
    'Orange':18,
    'Grapes':19,
    'Banana':20,
    'Pineapple':21,
    'Rubber':  22,
    'Strawberry':23,
    'Onion':24,
    'Pepper':25,
    'Chickpeas':26
}
df['crop_num']=df['Crop_Type'].map(crop_dict)

In [None]:
X = df.drop(['Crop_Type','Output_Profit','Production','Price_per_unit','crop_num'],axis=1)
y = df[['crop_num','Output_Profit']]

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
X_train.head(1)

Unnamed: 0,Location,Soil Type,Rainfall,Area_Cultivated,Investment
29,Panaji,Sandy,950,120,85000


In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
ohe = OneHotEncoder(drop='first')
scale = StandardScaler()

preprocesser = ColumnTransformer(
        transformers = [
            ('StandardScale', scale, [2,3,4]),
            ('OHE', ohe, [0,1]),
        ],
        remainder='passthrough'
)

In [None]:
X_train_dummy = preprocesser.fit_transform(X_train)
X_test_dummy = preprocesser.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, r2_score
from sklearn.multioutput import MultiOutputClassifier


clf_crop = MultiOutputClassifier(RandomForestClassifier(n_estimators=100, random_state=42))
clf_profit = RandomForestClassifier(n_estimators=100, random_state=42)

clf_crop.fit(X_train_dummy, y_train)
clf_profit.fit(X_train_dummy, y_train)

y_pred_crop = clf_crop.predict(X_test_dummy)
y_pred_profit = clf_profit.predict(X_test_dummy)


In [None]:
def recommendation(Location,	Soil_Type,	Rainfall,	Area_Cultivated,	Investment):
    features = np.array([[Location,	Soil_Type,	Rainfall,	Area_Cultivated,	Investment]])
    transformed_features = preprocesser.transform(features)

    prediction = clf_crop.predict(transformed_features).reshape(1, -1)

    return prediction

In [None]:
location='Delhi'
soil_type='Loam'
rainfall=600
area=100
investment=75000

predict = recommendation(location,soil_type,rainfall,area,investment)
crop_dict={
    1:'Rice',
    2:'Wheat',
    3:'Cotton',
    4:'Tea',
    5:'Soybean',
    6:'Barley',
    7:'Maize',
    8:'Coconut',
    9:'Apple',
    10:'Millets',
    11:'Sugarcane',
    12:'Groundnut',
    13:'Jute' ,
    14:'Potato',
    15:'Corn',
    16:'Mango',
    17:'Cashew' ,
    18:'Orange',
    19:'Grapes',
    20:'Banana',
    21:'Pineapple',
    22:'Rubber',
    23:'Strawberry',
    24:'Onion',
    25:'Pepper',
    26:'Chickpeas'
}

if predict[0][0] in crop_dict:
    crop = crop_dict[predict[0][0]]
    print("{} is a best crop to be cultivated ".format(crop))
else:
    print("Sorry are not able to recommend a proper crop for this environment")
print(str(predict[0][1])+" of profit can be made")

Wheat is a best crop to be cultivated 
2625000 of profit can be made




In [None]:
import pickle
pickle.dump(clf_crop,open('clf_crop.pkl','wb'))
pickle.dump(preprocesser,open('preprocesser.pkl','wb'))
pickle.dump(clf_profit,open('clf_profit.pkl','wb'))