In [2]:
import numpy as np
import pandas as pd



In [3]:
class SVM:
    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=100, batch_size=32):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.batch_size = batch_size
        self.weights = None
        self.bias = None
        self.classes = None
        
    def fit(self, X, y):
        self.classes = np.unique(y)
        n_samples, n_features = X.shape
        self.weights = np.zeros((len(self.classes), n_features))
        self.bias = np.zeros(len(self.classes))
        for i, c in enumerate(self.classes):
            y_binary = np.where(y == c, 1, -1)
            w = np.zeros(n_features)
            b = 0
            for _ in range(self.n_iters):
                indices = np.random.choice(n_samples, self.batch_size, replace=False)
                X_batch = X[indices]
                y_batch = y_binary[indices]
                scores = np.dot(X_batch, w) - b
                margins = y_batch * scores
                misclassified = margins < 1
                grad_w = self.lambda_param * w - np.dot(X_batch.T, y_batch * misclassified) / self.batch_size
                grad_b = -np.sum(y_batch * misclassified) / self.batch_size
                w -= self.lr * grad_w
                b -= self.lr * grad_b
            self.weights[i] = w
            self.bias[i] = b
            
    def predict(self, X):
        output = np.dot(X, self.weights.T) - self.bias
        return self.classes[np.argmax(output, axis=1)]

In [4]:
# Define the Random Forest model
from sklearn.tree import DecisionTreeClassifier


class RandomForest:
    def __init__(self, n_trees=100, max_depth=5, min_samples_split=2):
        self.n_trees = n_trees
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.trees = []

    def fit(self, X, y):
        n_samples, n_features = X.shape

        for _ in range(self.n_trees):
            tree = DecisionTreeClassifier(max_depth=self.max_depth,
                                min_samples_split=self.min_samples_split)
            idx = np.random.choice(n_samples, n_samples, replace=True)
            X_subset = X[idx]
            y_subset = y[idx]
            tree.fit(X_subset, y_subset)
            self.trees.append(tree)

    def predict(self, X):
        tree_preds = np.array([tree.predict(X) for tree in self.trees])
        tree_preds = np.swapaxes(tree_preds, 0, 1)
        y_pred = [np.bincount(tree_pred).argmax() for tree_pred in tree_preds]
        return np.array(y_pred)

In [5]:
data=pd.read_csv("Crop_recommendation.csv")


In [6]:
data.head()


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [7]:
data.tail()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.92461,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee
2199,104,18,30,23.603016,60.396475,6.779833,140.937041,coffee


In [8]:
data.shape

(2200, 8)

In [9]:
data.columns

Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')

In [10]:
data.duplicated().sum()


0

In [11]:
data.isnull().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

In [12]:
data.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117


In [13]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB


In [14]:
data.nunique()

N               137
P               117
K                73
temperature    2200
humidity       2200
ph             2200
rainfall       2200
label            22
dtype: int64

In [15]:
data['label'].unique()

array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

In [16]:
data['label'].value_counts()

rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grapes         100
mango          100
banana         100
pomegranate    100
lentil         100
blackgram      100
mungbean       100
mothbeans      100
pigeonpeas     100
kidneybeans    100
chickpea       100
coffee         100
Name: label, dtype: int64

In [17]:
crop_sum=pd.pivot_table(data,index=['label'],aggfunc="mean")

In [18]:
crop_sum

Unnamed: 0_level_0,K,N,P,humidity,ph,rainfall,temperature
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
apple,199.89,20.8,134.22,92.333383,5.929663,112.654779,22.630942
banana,50.05,100.23,82.01,80.358123,5.983893,104.62698,27.376798
blackgram,19.24,40.02,67.47,65.118426,7.133952,67.884151,29.97334
chickpea,79.92,40.09,67.79,16.860439,7.336957,80.058977,18.872847
coconut,30.59,21.98,16.93,94.844272,5.976562,175.686646,27.409892
coffee,29.94,101.2,28.74,58.869846,6.790308,158.066295,25.540477
cotton,19.56,117.77,46.24,79.843474,6.912675,80.398043,23.988958
grapes,200.11,23.18,132.53,81.875228,6.025937,69.611829,23.849575
jute,39.99,78.4,46.86,79.639864,6.732778,174.792798,24.958376
kidneybeans,20.05,20.75,67.54,21.605357,5.749411,105.919778,20.115085


In [19]:
data


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [20]:
data.dropna()


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [21]:
data.shape

(2200, 8)

In [22]:
 X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [23]:
data.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117


In [24]:
print(X.shape)

(2200, 7)


In [25]:
print(y.shape)

(2200,)


In [26]:
data.shape

(2200, 8)

In [27]:
from sklearn.preprocessing import LabelEncoder

In [28]:
print(X.shape)

(2200, 7)


In [29]:
print(y.shape)

(2200,)


In [30]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
le = LabelEncoder()
y = le.fit_transform(y)

In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [57]:
svm_model = SVM()

In [58]:
svm_model.fit(X_train, y_train)

In [59]:
svm_pred = svm_model.predict([[[90,42,43,20.87974371,82.00274423,6.502985292000001,202.9355362]]])
svm_acc = accuracy_score(y_test, svm_pred)

ValueError: Found input variables with inconsistent numbers of samples: [440, 1]

In [60]:
rf_model = RandomForest()
rf_model.fit(X_train, y_train)

In [68]:
from sklearn import metrics
rf_pred = rf_model.predict([[90,42,43,20.87974371,82.00274423,6.502985292000001,202.9355362]])
print(rf_pred.shape)
print("Accuracy:",metrics.accuracy_score(y_test, rf_pred))

(1,)


ValueError: Found input variables with inconsistent numbers of samples: [440, 1]

In [52]:
print(X_test.shape)
print (y_test.shape)
print(svm_pred.shape)

(440, 7)
(440,)
(440,)


In [38]:
print(svm_acc)

0.5295454545454545


In [39]:
rf_pred = rf_model.predict([[90,42,43,20.87974371,82.00274423,6.502985292000001,202.9355362]])

In [40]:
print(rf_pred)

[8]


In [41]:
le.inverse_transform(rf_pred)

array(['jute'], dtype=object)