In [68]:
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from ydata_profiling import ProfileReport
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, RandomForestRegressor,     GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, r2_score, classification_report
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

crop_filepath = "./data/new1.csv"
data = pd.read_csv(crop_filepath)

data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [69]:
encoder = LabelEncoder()
data.label = encoder.fit_transform(data.label)

In [70]:
features = data.drop("label", axis=1)
target = data.label

In [71]:
X_train, X_test, y_train, y_test = train_test_split(features, target, random_state=42)

In [72]:
X_test

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
3206,34,33,53,27.528578,72.186749,6.414002,81.094966
2998,104,59,165,21.183971,73.530064,6.207726,50.388060
2642,87,106,80,32.578839,70.982596,5.226143,70.743399
139,76,57,18,18.980273,74.526008,6.092726,94.262494
2649,108,119,73,33.870362,82.413168,5.413098,64.561815
...,...,...,...,...,...,...,...
2535,77,24,41,36.946534,61.999783,6.265900,62.815937
2186,107,38,29,26.650693,57.566957,6.351182,145.105065
857,17,74,17,26.030270,69.558631,7.393211,37.113958
1675,25,21,11,32.237978,90.154068,6.460045,104.705225


In [73]:
new_data = {
    'N': [50],
    'P': [100],
    'K': [60],
    'temperature': [34],
    'humidity': [70],
    'ph': [5.5],
    'rainfall': [100],
}
X = pd.DataFrame(new_data)

## Random Forest

In [74]:
rf = RandomForestClassifier(n_estimators=10, max_features=3, random_state=0).fit(features, target)
rf_pred= rf.predict(X)

print(rf_pred)
original = encoder.inverse_transform(rf_pred)
print(original)

[31]
['sweetpotato']


In [75]:
named_target = encoder.inverse_transform(target)

In [76]:
rf_prob = rf.predict_proba(X)
print(rf_prob)
label_probabilities = []
for i, probabilities in enumerate(rf_prob):
    label_probabilities.append([(probabilities[j], j) for j in range(len(probabilities))])
    label_probabilities[i].sort(reverse=True)

# Print the ranking of the labels for the first observation in the test set
label_probabilities = label_probabilities[0]
print(label_probabilities)

[[0.  0.1 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.2 0.  0.  0.  0.1 0.  0.1 0.  0.  0.  0.  0.4 0.  0.  0.1]]
[(0.4, 31), (0.2, 20), (0.1, 34), (0.1, 26), (0.1, 24), (0.1, 1), (0.0, 33), (0.0, 32), (0.0, 30), (0.0, 29), (0.0, 28), (0.0, 27), (0.0, 25), (0.0, 23), (0.0, 22), (0.0, 21), (0.0, 19), (0.0, 18), (0.0, 17), (0.0, 16), (0.0, 15), (0.0, 14), (0.0, 13), (0.0, 12), (0.0, 11), (0.0, 10), (0.0, 9), (0.0, 8), (0.0, 7), (0.0, 6), (0.0, 5), (0.0, 4), (0.0, 3), (0.0, 2), (0.0, 0)]


In [77]:
list_label_probabilities = [list(t) for t in label_probabilities]
print(list_label_probabilities)

[[0.4, 31], [0.2, 20], [0.1, 34], [0.1, 26], [0.1, 24], [0.1, 1], [0.0, 33], [0.0, 32], [0.0, 30], [0.0, 29], [0.0, 28], [0.0, 27], [0.0, 25], [0.0, 23], [0.0, 22], [0.0, 21], [0.0, 19], [0.0, 18], [0.0, 17], [0.0, 16], [0.0, 15], [0.0, 14], [0.0, 13], [0.0, 12], [0.0, 11], [0.0, 10], [0.0, 9], [0.0, 8], [0.0, 7], [0.0, 6], [0.0, 5], [0.0, 4], [0.0, 3], [0.0, 2], [0.0, 0]]


In [78]:
for inner_list in list_label_probabilities:
    inner_list[1] = encoder.inverse_transform([inner_list[1]])
list_label_probabilities

[[0.4, array(['sweetpotato'], dtype=object)],
 [0.2, array(['mucuna'], dtype=object)],
 [0.1, array(['yam'], dtype=object)],
 [0.1, array(['pigeonpeas'], dtype=object)],
 [0.1, array(['papaya'], dtype=object)],
 [0.1, array(['banana'], dtype=object)],
 [0.0, array(['watermelon'], dtype=object)],
 [0.0, array(['tomato'], dtype=object)],
 [0.0, array(['soybean'], dtype=object)],
 [0.0, array(['sorghum'], dtype=object)],
 [0.0, array(['rice'], dtype=object)],
 [0.0, array(['pomegranate'], dtype=object)],
 [0.0, array(['pepper'], dtype=object)],
 [0.0, array(['orange'], dtype=object)],
 [0.0, array(['muskmelon'], dtype=object)],
 [0.0, array(['mungbean'], dtype=object)],
 [0.0, array(['mothbeans'], dtype=object)],
 [0.0, array(['millet'], dtype=object)],
 [0.0, array(['mango'], dtype=object)],
 [0.0, array(['maize'], dtype=object)],
 [0.0, array(['lentil'], dtype=object)],
 [0.0, array(['kidneybeans'], dtype=object)],
 [0.0, array(['jute'], dtype=object)],
 [0.0, array(['groundnut'], dtype