In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Load the dataset
csv_data = pd.read_csv('crop_recommendation.csv')


In [3]:
csv_data

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [4]:
# Extract the features and label columns
X = csv_data.drop('label', axis=1).values
y = csv_data['label'].values

In [5]:
X

array([[ 90.        ,  42.        ,  43.        , ...,  82.00274423,
          6.50298529, 202.9355362 ],
       [ 85.        ,  58.        ,  41.        , ...,  80.31964408,
          7.03809636, 226.6555374 ],
       [ 60.        ,  55.        ,  44.        , ...,  82.3207629 ,
          7.84020714, 263.9642476 ],
       ...,
       [118.        ,  33.        ,  30.        , ...,  67.22512329,
          6.36260785, 173.3228386 ],
       [117.        ,  32.        ,  34.        , ...,  52.12739421,
          6.75879255, 127.1752928 ],
       [104.        ,  18.        ,  30.        , ...,  60.39647474,
          6.77983261, 140.9370415 ]])

In [6]:
y

array(['rice', 'rice', 'rice', ..., 'coffee', 'coffee', 'coffee'],
      dtype=object)

In [7]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
X_train

array([[ 17.        ,  16.        ,  14.        , ...,  92.18151927,
          6.62553865, 102.944161  ],
       [ 37.        ,  79.        ,  19.        , ...,  69.3478631 ,
          7.14394276,  69.40878198],
       [  7.        ,  73.        ,  25.        , ...,  63.13215259,
          7.28805662,  45.20841071],
       ...,
       [ 11.        ,  36.        ,  31.        , ...,  51.77965917,
          6.47544932, 100.2585673 ],
       [ 11.        , 124.        , 204.        , ...,  80.06633966,
          6.36114111,  71.40043037],
       [ 32.        ,  78.        ,  22.        , ...,  62.35557553,
          7.00703752,  53.40906048]])

In [9]:
y_train

array(['orange', 'blackgram', 'lentil', ..., 'mango', 'grapes', 'lentil'],
      dtype=object)

In [10]:
X_test

array([[101.        ,  17.        ,  47.        , ...,  94.72981338,
          6.18505323,  26.30820876],
       [ 98.        ,   8.        ,  51.        , ...,  86.52258079,
          6.25933595,  49.43050977],
       [ 59.        ,  62.        ,  49.        , ...,  93.35191636,
          6.94149681, 114.778071  ],
       ...,
       [121.        ,  47.        ,  16.        , ...,  79.29573149,
          7.72324015,  72.49800885],
       [116.        ,  52.        ,  19.        , ...,  75.37170612,
          6.11452588,  67.08022574],
       [  5.        ,  68.        ,  20.        , ...,  33.10695144,
          6.12166671, 155.3705624 ]])

In [11]:
y_test

array(['muskmelon', 'watermelon', 'papaya', 'papaya', 'apple', 'mango',
       'apple', 'mothbeans', 'mungbean', 'lentil', 'blackgram', 'coconut',
       'pomegranate', 'jute', 'coconut', 'pomegranate', 'apple', 'maize',
       'papaya', 'muskmelon', 'coffee', 'papaya', 'orange', 'papaya',
       'chickpea', 'jute', 'mungbean', 'orange', 'pigeonpeas', 'rice',
       'pomegranate', 'mothbeans', 'jute', 'lentil', 'jute', 'blackgram',
       'jute', 'chickpea', 'chickpea', 'kidneybeans', 'papaya', 'mango',
       'blackgram', 'maize', 'mungbean', 'maize', 'pigeonpeas', 'coconut',
       'muskmelon', 'maize', 'blackgram', 'coffee', 'grapes', 'mungbean',
       'coffee', 'kidneybeans', 'cotton', 'apple', 'banana', 'blackgram',
       'watermelon', 'coconut', 'lentil', 'orange', 'papaya',
       'pigeonpeas', 'orange', 'rice', 'muskmelon', 'pigeonpeas',
       'muskmelon', 'coconut', 'jute', 'banana', 'blackgram', 'papaya',
       'banana', 'cotton', 'watermelon', 'orange', 'coffee', 'chickp

In [12]:

# Scale the features using Min-Max Scaling
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [13]:
X_train_scaled

array([[0.12142857, 0.07857143, 0.045     , ..., 0.9089898 , 0.48532225,
        0.29685161],
       [0.26428571, 0.52857143, 0.07      , ..., 0.64257946, 0.56594073,
        0.17630752],
       [0.05      , 0.48571429, 0.1       , ..., 0.57005802, 0.58835229,
        0.08931844],
       ...,
       [0.07857143, 0.22142857, 0.13      , ..., 0.43760347, 0.46198144,
        0.28719815],
       [0.07857143, 0.85      , 0.995     , ..., 0.76763665, 0.44420505,
        0.18346657],
       [0.22857143, 0.52142857, 0.085     , ..., 0.56099735, 0.54465022,
        0.11879596]])

In [14]:
X_test_scaled

array([[0.72142857, 0.08571429, 0.21      , ..., 0.93872187, 0.41682113,
        0.021381  ],
       [0.7       , 0.02142857, 0.23      , ..., 0.84296447, 0.42837304,
        0.10449492],
       [0.42142857, 0.40714286, 0.22      , ..., 0.92264534, 0.5344578 ,
        0.33938901],
       ...,
       [0.86428571, 0.3       , 0.055     , ..., 0.75864563, 0.6560289 ,
        0.18741185],
       [0.82857143, 0.33571429, 0.07      , ..., 0.71286229, 0.40585322,
        0.16793744],
       [0.03571429, 0.45      , 0.075     , ..., 0.21974075, 0.40696371,
        0.48530014]])

In [15]:

# Train the decision tree classifier
dtree = DecisionTreeClassifier(random_state=42)
dtree.fit(X_train_scaled, y_train)



In [16]:
y_train

array(['orange', 'blackgram', 'lentil', ..., 'mango', 'grapes', 'lentil'],
      dtype=object)

In [17]:
# Make predictions on the scaled test set
y_pred_scaled = dtree.predict(X_test_scaled)


In [18]:

# Evaluate the accuracy of the scaled model
accuracy_scaled = accuracy_score(y_test, y_pred_scaled)
print('Accuracy (scaled):', accuracy_scaled)


Accuracy (scaled): 0.9863636363636363


In [19]:
# Example new data for prediction
new_data = [[25, 60, 80, 7.5, 0.2, 6.5, 0.5]]   # Example feature values for crop 2

# Scale the new data using the same scaler object
new_data_scaled = scaler.transform(new_data)

In [20]:
value=dtree.predict(new_data_scaled)
value

array(['muskmelon'], dtype=object)

In [21]:
def predictDtree(N, P, K, temp, hum, ph, rainfall):
    import pandas as pd
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import MinMaxScaler
    # Load the dataset
    csv_data = pd.read_csv('crop_recommendation.csv')
    # Extract the features and label columns
    X = csv_data.drop('label', axis=1).values
    y = csv_data['label'].values
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20)
    
    # Scale the features using Min-Max Scaling
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Scale the features using Min-Max Scaling
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    new_data = [[N, P, K, temp, hum, ph, rainfall]]   
    new_data_scaled = scaler.transform(new_data)
    value=dtree.predict(new_data_scaled)
    return value

    

    


    
    




In [22]:
value=predictDtree(25, 60, 80, 7.5, 0.2, 6.5, 0.5)
op=predictDtree(90,42,43,20,82,6.5,202)
value
op

array(['rice'], dtype=object)

In [24]:
# Make predictions on the test set
y_pred = dtree.predict(X_test_scaled)

# Calculate accuracy score
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9863636363636363


In [25]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, roc_curve, roc_auc_score

# Calculate precision score
precision = precision_score(y_test, y_pred, average='weighted')


In [26]:

# Calculate recall score
recall = recall_score(y_test, y_pred, average='weighted')




In [27]:
# Calculate F1 score
f1 = f1_score(y_test, y_pred, average='weighted')



In [28]:
# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)



In [31]:
# Calculate ROC curve and AUC
y_probs = dtree.predict_proba(X_test_scaled)[:, 1]


print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 0.9863636363636363
Precision: 0.9868055555555556
Recall: 0.9863636363636363
F1 Score: 0.9863152866630287
Confusion Matrix:
 [[23  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0 21  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0 26  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0 27  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0 17  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0 17  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0 14  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0 22  0  0  0  0  0  0  0  0  0  0  0  1  0]
 [ 0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0 11  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  1  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0 

NameError: name 'auc' is not defined