In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pickle

In [2]:
dataset = pd.read_csv("/content/drive/MyDrive/crop.csv")

In [3]:
dataset

Unnamed: 0,temperature,humidity,ph,water availability,season,label
0,20.879744,82.002744,6.502985,202.935536,rainy,rice
1,21.770462,80.319644,7.038096,226.655537,rainy,rice
2,23.004459,82.320763,7.840207,263.964248,rainy,rice
3,26.491096,80.158363,6.980401,242.864034,rainy,rice
4,20.130175,81.604873,7.628473,262.717340,rainy,rice
...,...,...,...,...,...,...
1395,23.874845,86.792613,6.718725,177.514731,rainy,jute
1396,23.928879,88.071123,6.880205,154.660874,rainy,jute
1397,24.814412,81.686889,6.861069,190.788639,rainy,jute
1398,24.447439,82.286484,6.769346,190.968489,rainy,jute


In [4]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1400 entries, 0 to 1399
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   temperature         1400 non-null   float64
 1   humidity            1400 non-null   float64
 2   ph                  1400 non-null   float64
 3   water availability  1400 non-null   float64
 4   season              1400 non-null   object 
 5   label               1400 non-null   object 
dtypes: float64(4), object(2)
memory usage: 65.8+ KB


In [5]:
dataset.isnull().sum()

Unnamed: 0,0
temperature,0
humidity,0
ph,0
water availability,0
season,0
label,0


In [6]:
dataset['season'].unique()

array(['rainy', 'winter', 'spring', 'summer'], dtype=object)

In [7]:
encoder = OrdinalEncoder()

In [8]:
dataset['season'] = encoder.fit_transform(dataset[['season']])

In [9]:
dataset['season'].unique()

array([0., 3., 1., 2.])

In [10]:
Y = dataset['label']

In [11]:
X = dataset[['temperature','humidity','ph','water availability','season']]

In [12]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [13]:
x_test

Unnamed: 0,temperature,humidity,ph,water availability,season
665,27.106068,89.895933,6.698574,37.456806,0.0
624,28.951724,81.670853,6.510841,56.511033,0.0
115,18.254054,55.282204,6.204748,63.723582,0.0
478,29.490967,67.106044,6.471862,153.250451,0.0
233,17.848517,19.091729,8.621663,76.324707,3.0
...,...,...,...,...,...
188,25.009334,67.816568,6.528631,62.913595,0.0
1091,24.543570,84.608083,6.211749,42.006603,2.0
377,15.467893,21.437807,5.824208,88.887961,1.0
351,17.000676,19.907905,5.520880,103.292641,1.0


Data Modelling

Decision Tree Classifier (DTC)

In [14]:
model = DecisionTreeClassifier()

In [15]:
model.fit(x_train, y_train)

In [50]:
with open("dtc_model.pkl", "wb") as file:
    pickle.dump(model, file)

In [17]:
model_pred = model.predict(x_test)

In [18]:
model_pred

array(['mungbean', 'mungbean', 'maize', 'pigeonpeas', 'chickpea',
       'blackgram', 'chickpea', 'lentil', 'maize', 'pigeonpeas', 'maize',
       'maize', 'blackgram', 'blackgram', 'kidneybeans', 'kidneybeans',
       'mothbeans', 'mungbean', 'chickpea', 'jute', 'rice', 'chickpea',
       'chickpea', 'mungbean', 'rice', 'rice', 'rice', 'watermelon',
       'watermelon', 'pigeonpeas', 'blackgram', 'mothbeans', 'lentil',
       'blackgram', 'maize', 'pigeonpeas', 'chickpea', 'blackgram',
       'jute', 'watermelon', 'rice', 'watermelon', 'mothbeans', 'lentil',
       'kidneybeans', 'maize', 'watermelon', 'lentil', 'rice',
       'blackgram', 'blackgram', 'rice', 'mothbeans', 'muskmelon',
       'muskmelon', 'mothbeans', 'kidneybeans', 'lentil', 'chickpea',
       'jute', 'chickpea', 'maize', 'lentil', 'maize', 'pigeonpeas',
       'maize', 'muskmelon', 'chickpea', 'kidneybeans', 'maize',
       'muskmelon', 'mothbeans', 'blackgram', 'lentil', 'chickpea',
       'chickpea', 'maize', 'ric

In [19]:
print(classification_report(y_test, model_pred))

              precision    recall  f1-score   support

   blackgram       1.00      1.00      1.00        22
    chickpea       1.00      1.00      1.00        22
      cotton       1.00      1.00      1.00        14
        jute       0.78      1.00      0.88        18
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      1.00      1.00        23
       maize       0.95      0.97      0.96        39
   mothbeans       1.00      1.00      1.00        22
    mungbean       1.00      1.00      1.00        21
   muskmelon       1.00      1.00      1.00        20
  pigeonpeas       1.00      0.80      0.89        20
        rice       1.00      0.89      0.94        19
  watermelon       1.00      1.00      1.00        20

    accuracy                           0.97       280
   macro avg       0.98      0.97      0.97       280
weighted avg       0.98      0.97      0.98       280



In [20]:
param = [[29.106068,	59.895933,	3.698574,	45.456806,	2.0]]


In [21]:
model.predict(param)



array(['mothbeans'], dtype=object)

Random Forest Classifier(RFC)

In [22]:
rfc=RandomForestClassifier()

In [23]:
rfc.fit(x_train,y_train)

In [24]:
rfc_pred=rfc.predict(x_test)

In [25]:
param = [[22,	34.895933,	4.698574,	170.456806,	1.0]]

In [26]:
model.predict(param)



array(['pigeonpeas'], dtype=object)

In [27]:
print(classification_report(y_test,rfc_pred))

              precision    recall  f1-score   support

   blackgram       1.00      1.00      1.00        22
    chickpea       1.00      1.00      1.00        22
      cotton       1.00      1.00      1.00        14
        jute       0.90      1.00      0.95        18
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      1.00      1.00        23
       maize       0.97      1.00      0.99        39
   mothbeans       1.00      1.00      1.00        22
    mungbean       1.00      1.00      1.00        21
   muskmelon       1.00      1.00      1.00        20
  pigeonpeas       1.00      0.95      0.97        20
        rice       1.00      0.89      0.94        19
  watermelon       1.00      1.00      1.00        20

    accuracy                           0.99       280
   macro avg       0.99      0.99      0.99       280
weighted avg       0.99      0.99      0.99       280



Logistic Regression

In [28]:
lreg=LogisticRegression()

In [29]:
lreg.fit(x_train,y_train)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [30]:
lreg_pred=lreg.predict(x_test)

In [31]:
param = [[22,	34.895933,	4.698574,	170.456806,	1.0]]

In [32]:
model.predict(param)



array(['pigeonpeas'], dtype=object)

In [33]:
print(classification_report(y_test,lreg_pred))

              precision    recall  f1-score   support

   blackgram       0.73      0.86      0.79        22
    chickpea       0.96      1.00      0.98        22
      cotton       0.64      0.64      0.64        14
        jute       0.71      0.83      0.77        18
 kidneybeans       0.95      0.95      0.95        20
      lentil       0.74      0.74      0.74        23
       maize       0.75      0.77      0.76        39
   mothbeans       0.72      0.59      0.65        22
    mungbean       0.90      0.86      0.88        21
   muskmelon       1.00      1.00      1.00        20
  pigeonpeas       0.94      0.85      0.89        20
        rice       0.88      0.79      0.83        19
  watermelon       1.00      1.00      1.00        20

    accuracy                           0.84       280
   macro avg       0.84      0.84      0.84       280
weighted avg       0.84      0.84      0.84       280



Random Forest Classification (RFC)

In [34]:
rfc_model=RandomForestClassifier()

In [35]:
rfc_model.fit(x_train,y_train)

In [36]:
rfc_pred=rfc_model.predict(x_test)

In [37]:
print(classification_report(y_test,rfc_pred))

              precision    recall  f1-score   support

   blackgram       1.00      1.00      1.00        22
    chickpea       1.00      1.00      1.00        22
      cotton       1.00      1.00      1.00        14
        jute       0.90      1.00      0.95        18
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      1.00      1.00        23
       maize       0.97      1.00      0.99        39
   mothbeans       1.00      1.00      1.00        22
    mungbean       1.00      1.00      1.00        21
   muskmelon       1.00      1.00      1.00        20
  pigeonpeas       1.00      0.95      0.97        20
        rice       1.00      0.89      0.94        19
  watermelon       1.00      1.00      1.00        20

    accuracy                           0.99       280
   macro avg       0.99      0.99      0.99       280
weighted avg       0.99      0.99      0.99       280



In [38]:
rfc_model.predict(param)



array(['pigeonpeas'], dtype=object)

SVC (Support Vector Classification)

In [39]:
svc_model=SVC()

In [40]:
svc_model.fit(x_train,y_train)

In [41]:
svc_pred=svc_model.predict(x_train)

In [42]:
print(classification_report(y_train,svc_pred))

              precision    recall  f1-score   support

   blackgram       0.80      0.96      0.87        78
    chickpea       0.72      1.00      0.84        78
      cotton       0.99      0.86      0.92        86
        jute       0.78      1.00      0.88        82
 kidneybeans       0.93      0.62      0.75        80
      lentil       0.85      1.00      0.92        77
       maize       0.90      0.96      0.93       161
   mothbeans       1.00      0.64      0.78        78
    mungbean       1.00      0.18      0.30        79
   muskmelon       1.00      1.00      1.00        80
  pigeonpeas       1.00      0.78      0.87        80
        rice       1.00      0.78      0.88        81
  watermelon       0.53      1.00      0.69        80

    accuracy                           0.84      1120
   macro avg       0.88      0.83      0.82      1120
weighted avg       0.89      0.84      0.83      1120



In [43]:
svc_model.predict(param)



array(['pigeonpeas'], dtype=object)

Naive Bies (Gussian
NB)

In [44]:
nvb_model=GaussianNB()

In [45]:
nvb_model.fit(x_train,y_train)

In [46]:
nvb_pred=nvb_model.predict(x_train)

In [47]:
print(classification_report(y_train,nvb_pred))

              precision    recall  f1-score   support

   blackgram       0.86      1.00      0.92        78
    chickpea       1.00      1.00      1.00        78
      cotton       0.82      1.00      0.90        86
        jute       0.92      0.98      0.95        82
 kidneybeans       1.00      1.00      1.00        80
      lentil       0.90      1.00      0.94        77
       maize       1.00      0.40      0.58       161
   mothbeans       1.00      1.00      1.00        78
    mungbean       1.00      1.00      1.00        79
   muskmelon       1.00      1.00      1.00        80
  pigeonpeas       0.59      1.00      0.74        80
        rice       0.97      0.91      0.94        81
  watermelon       1.00      1.00      1.00        80

    accuracy                           0.91      1120
   macro avg       0.93      0.95      0.92      1120
weighted avg       0.93      0.91      0.90      1120



In [48]:
nvb_model.predict(param)



array(['maize'], dtype='<U11')