In [2]:
import pandas as pd
import requests as rq
import json as js
from time import sleep
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings
import category_encoders as ce
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from lightgbm import LGBMRegressor
from sklearn.linear_model import ElasticNet
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
warnings.filterwarnings('ignore')
import joblib

# API Call

In [2]:
class BestTimeToFertilize:
    __BASE_URL = "https://api.weatherbit.io/v2.0/forecast/daily?"
    __API_KEY = "051d187cb45f4da7835e3fcd695dc707"
    
    def __init__(self, city_name = 'Nagpur', state_name = 'Maharashtra', days = 7):
        self.city_name = '+'.join(city_name.lower().strip().split())
        self.state_name = '+'.join(state_name.lower().strip().split())
        self.country_name = 'IN'
        self.days = days
        self.response = None
        self.response_code = None
        self.weather_data = list()
        
    def api_caller(self):
        try:
            complete_url = "{0}city={1}&state={2}&country={3}&key={4}&days={5}".format(self.__BASE_URL, self.city_name, self.state_name, self.country_name, self.__API_KEY, self.days)
            print(complete_url)
            self.response = rq.get(complete_url)
            sleep(5)
            self.response_code = self.response.status_code
            return self.response_code
        except Exception as msg:
            print("api_caller():", msg)
            return -1
        
    def is_api_call_success(self):
        if self.response_code == 200:
            return True
        elif self.response_code == 204:
            print('Oops! It seems there was an issue with the API call. Please check your input and try again later.')
        return False
    
    def json_file_bulider(self):
        try:
            json_obj = self.response.json()
            with open('weather_data.json', 'w') as file:
                js.dump(json_obj, file, indent = 1, sort_keys = True)
            print("weather_data.json file built successfully")
        except Exception as msg:
            print("json_bulider():", msg)
            
    def best_time_fertilize(self):
        json_obj = self.response.json()
        
        print("City:", json_obj['city_name'], "\n")

        prolonged_precip = 0
        prolonged_prob = 0
        heavy_rain_2d = False
        heavy_rain_chance_2d = 0
        precip_2d = 0
        precip_chance_2d = 0
        
        for i in range(self.days):
            date = json_obj['data'][i]['datetime']
            temp = json_obj['data'][i]['temp']
            rh = json_obj['data'][i]['rh']
            precip = json_obj['data'][i]['precip']
            prob = json_obj['data'][i]['pop']
            w_code = json_obj['data'][i]['weather']['code']
            w_desc = json_obj['data'][i]['weather']['description']
            i_code = json_obj['data'][i]['weather']['icon']
            prolonged_precip += precip
            prolonged_prob += prob

            count_2d = 0
            if i < 2:
                precip_2d += precip
                precip_chance_2d += prob
                if w_code in [202, 233, 502, 521, 522]:
                    heavy_rain_2d = True
                    heavy_rain_chance_2d += prob
                    count_2d += 1
                    heavy_rain_chance_2d //= count_2d
            
            di = {
                  "Date": str(date), 
                  "Temperature": str(temp), 
                  "Relative Humidity": str(rh), 
                  "Rainfall": str(precip), 
                  "Probability of Precipitation": str(prob),
                  "Weather code": str(w_code),
                  "Weather Description": str(w_desc),
                  "Icon code": str(i_code)
                 }
            self.weather_data.append(di)
            
            print("Date:", date)
            print("Temperature:", temp)
            print("Relative Humidity:", rh)
            print("Rainfall:", precip)
            print("Probability of Precipitation:", prob)
            print("Weather code:", w_code, "->", w_desc)
            print()

        prolonged_prob //= self.days
        precip_chance_2d //= 2

        print("-"*80)
        print("Rainfall Forecast for the Next 2 Days (including today):", precip_2d)
        print("Probability of Rain for the Next 2 Days (including today):", precip_chance_2d)
        print()

        if heavy_rain_2d:
            print("*"*21, "Warning !!!", "*"*21)
            print("Heavy Rain Chances:", heavy_rain_chance_2d)
            print("Heavy Rainfall puts your fertilizer at risk.")
            print("*"*21, "Warning !!!", "*"*21)

        print("Prolonged Precipitation:", prolonged_precip)
        print("Prolonged Precipitation Probability:", prolonged_prob)
        print()

        if prolonged_precip > 12.7 and prolonged_prob >= 50:
            print("*"*21, "Warning !!!", "*"*21)
            print("Prolonged Rainfall of greater than 12.7 mm puts your fertilizer at risk.")
            print("*"*21, "Warning !!!", "*"*21)

In [3]:
if __name__ == '__main__':
    obj = BestTimeToFertilize('Mumbai', 'Maharashtra')
    print(obj.api_caller())
    if obj.is_api_call_success():
        obj.best_time_fertilize()
        obj.json_file_bulider()
#         print(obj.weather_data)

https://api.weatherbit.io/v2.0/forecast/daily?city=mumbai&state=maharashtra&country=IN&key=051d187cb45f4da7835e3fcd695dc707&days=7
200
City: Mumbai 

Date: 2023-09-07
Temperature: 26.3
Relative Humidity: 89
Rainfall: 47.3125
Probability of Precipitation: 95
Weather code: 502 -> Heavy rain

Date: 2023-09-08
Temperature: 27.2
Relative Humidity: 88
Rainfall: 70.875
Probability of Precipitation: 95
Weather code: 502 -> Heavy rain

Date: 2023-09-09
Temperature: 27.6
Relative Humidity: 86
Rainfall: 26.3125
Probability of Precipitation: 90
Weather code: 502 -> Heavy rain

Date: 2023-09-10
Temperature: 27.9
Relative Humidity: 84
Rainfall: 7.625
Probability of Precipitation: 70
Weather code: 501 -> Moderate rain

Date: 2023-09-11
Temperature: 27.9
Relative Humidity: 84
Rainfall: 13.625
Probability of Precipitation: 80
Weather code: 501 -> Moderate rain

Date: 2023-09-12
Temperature: 27.9
Relative Humidity: 83
Rainfall: 3.25
Probability of Precipitation: 50
Weather code: 500 -> Light rain

Date:

# Data

In [3]:
data = './app/data/Nutrient_recommendation.csv'
df = pd.read_csv(data, header=None)
df.shape

(2201, 7)

In [4]:
df.head(10)

Unnamed: 0,0,1,2,3,4,5,6
0,Crop,Temperature,Humidity,Rainfall,Label_N,Label_P,Label_K
1,rice,20.87974371,82.00274423,202.9355362,90,42,43
2,rice,21.77046169,80.31964408,226.6555374,85,58,41
3,rice,23.00445915,82.3207629,263.9642476,60,55,44
4,rice,26.49109635,80.15836264,242.8640342,74,35,40
5,rice,20.13017482,81.60487287,262.7173405,78,42,42
6,rice,23.05804872,83.37011772,251.0549998,69,37,42
7,rice,22.70883798,82.63941394,271.3248604,69,55,38
8,rice,20.27774362,82.89408619,241.9741949,94,53,40
9,rice,24.51588066,83.5352163,230.4462359,89,54,38


In [5]:
df.columns = ['Crop', 'Temperature', 'Humidity', 'Rainfall', 'Label_N', 'Label_P', 'Label_K']
df.drop(df.index[:1], inplace=True)
df.head(10)

Unnamed: 0,Crop,Temperature,Humidity,Rainfall,Label_N,Label_P,Label_K
1,rice,20.87974371,82.00274423,202.9355362,90,42,43
2,rice,21.77046169,80.31964408,226.6555374,85,58,41
3,rice,23.00445915,82.3207629,263.9642476,60,55,44
4,rice,26.49109635,80.15836264,242.8640342,74,35,40
5,rice,20.13017482,81.60487287,262.7173405,78,42,42
6,rice,23.05804872,83.37011772,251.0549998,69,37,42
7,rice,22.70883798,82.63941394,271.3248604,69,55,38
8,rice,20.27774362,82.89408619,241.9741949,94,53,40
9,rice,24.51588066,83.5352163,230.4462359,89,54,38
10,rice,23.22397386,83.03322691,221.2091958,68,58,38


In [6]:
df[['Temperature', 'Humidity', 'Rainfall']] = df[['Temperature', 'Humidity', 'Rainfall']].astype(float)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 1 to 2200
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Crop         2200 non-null   object 
 1   Temperature  2200 non-null   float64
 2   Humidity     2200 non-null   float64
 3   Rainfall     2200 non-null   float64
 4   Label_N      2200 non-null   object 
 5   Label_P      2200 non-null   object 
 6   Label_K      2200 non-null   object 
dtypes: float64(3), object(4)
memory usage: 120.4+ KB


In [8]:
df.isnull().sum()

Crop           0
Temperature    0
Humidity       0
Rainfall       0
Label_N        0
Label_P        0
Label_K        0
dtype: int64

In [9]:
X = df.drop(['Label_N', 'Label_P', 'Label_K'], axis=1)

y1 = df['Label_N']
y2 = df['Label_P']
y3 = df['Label_K']

y = np.concatenate([[y1], [y2], [y3]], axis=0).T
y

array([['90', '42', '43'],
       ['85', '58', '41'],
       ['60', '55', '44'],
       ...,
       ['118', '33', '30'],
       ['117', '32', '34'],
       ['104', '18', '30']], dtype=object)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)
X_train.shape, X_test.shape

((1760, 4), (440, 4))

In [11]:
y_train.shape, y_test.shape

((1760, 3), (440, 3))

In [12]:
y1_train = y_train[:, 0]
y2_train = y_train[:, 1]
y3_train = y_train[:, 2]

y1_test = y_test[:, 0]
y2_test = y_test[:, 1]
y3_test = y_test[:, 2]

In [13]:
X_train.head(10)

Unnamed: 0,Crop,Temperature,Humidity,Rainfall
460,pigeonpeas,33.151226,32.459745,105.038027
1995,cotton,23.438217,78.633888,81.150721
1454,muskmelon,29.125337,91.522911,21.904404
1850,coconut,27.058265,91.105104,224.700695
1786,papaya,29.02328,90.203968,126.806987
1281,grapes,35.121583,82.268908,69.715185
1175,mango,31.27181,52.238102,89.74409
2161,coffee,26.366299,52.257385,177.317616
320,kidneybeans,23.04291,22.42611,108.368432
981,pomegranate,20.241049,91.087068,109.253773


In [16]:
mapping = dict()
with open("./app/data/mapped_crops.csv", "w") as f:
    f.write("Crops,Key\n")
    for i, crop in enumerate(np.unique(df[['Crop']]), 1):
        mapping[crop] =  i
        f.write("%s,%d\n" % (crop, i))
    mapping['NA'] = np.nan
    f.write("NA,nan")
    
print(mapping)

{'apple': 1, 'banana': 2, 'blackgram': 3, 'chickpea': 4, 'coconut': 5, 'coffee': 6, 'cotton': 7, 'grapes': 8, 'jute': 9, 'kidneybeans': 10, 'lentil': 11, 'maize': 12, 'mango': 13, 'mothbeans': 14, 'mungbean': 15, 'muskmelon': 16, 'orange': 17, 'papaya': 18, 'pigeonpeas': 19, 'pomegranate': 20, 'rice': 21, 'watermelon': 22, 'NA': nan}


In [17]:
ordinal_cols_mapping = [{"col": "Crop", "mapping": mapping}, ]
encoder = ce.OrdinalEncoder(cols = 'Crop', mapping = ordinal_cols_mapping, return_df = True)

X_train = encoder.fit_transform(X_train)
X_test = encoder.transform(X_test)
X_train.head()

Unnamed: 0,Crop,Temperature,Humidity,Rainfall
460,19,33.151226,32.459745,105.038027
1995,7,23.438217,78.633888,81.150721
1454,16,29.125337,91.522911,21.904404
1850,5,27.058265,91.105104,224.700695
1786,18,29.02328,90.203968,126.806987


# Training

### Random Forest

In [18]:
# Define the parameter grid for grid search
param_grid = {
    'n_estimators': [50, 100, 150],  
    'max_depth': [5, 10, 15],  
    'min_samples_split': [2, 5, 10],  
    'min_samples_leaf': [2, 4, 6] 
}

regressor = RandomForestRegressor(random_state=42)

grid_search = GridSearchCV(estimator=regressor, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y1_train)

best_params_rf = grid_search.best_params_
best_regressor_rf = grid_search.best_estimator_


In [19]:
print(best_regressor_rf)
print(best_params_rf)

RandomForestRegressor(max_depth=10, min_samples_leaf=2, min_samples_split=10,
                      n_estimators=50, random_state=42)
{'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 50}


In [20]:
# Nitrogen

rf1 = RandomForestRegressor(**best_params_rf)
rf1.fit(X_train, y1_train)
y1_pred = rf1.predict(X_test)

print("Random Forest Regressor (Nitrogen):")

print('Mean Absolute Error:', metrics.mean_absolute_error(y1_test, y1_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y1_test, y1_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y1_test, y1_pred)))

rmse = float(format(np.sqrt(metrics.mean_squared_error(y1_test, y1_pred)), '.3f'))
print("\nRMSE: ", rmse)

Random Forest Regressor (Nitrogen):
Mean Absolute Error: 10.777496144635919
Mean Squared Error: 159.3087199612202
Root Mean Squared Error: 12.621755819267785

RMSE:  12.622


In [21]:
# Phosphorus

rf2 = RandomForestRegressor(**best_params_rf)
rf2.fit(X_train, y2_train)
y2_pred = rf2.predict(X_test)

print("Random Forest Regressor (Phosphorus):")

print('Mean Absolute Error:', metrics.mean_absolute_error(y2_test, y2_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y2_test, y2_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y2_test, y2_pred)))

rmse = float(format(np.sqrt(metrics.mean_squared_error(y2_test, y2_pred)), '.3f'))
print("\nRMSE: ", rmse)

Random Forest Regressor (Phosphorus):
Mean Absolute Error: 7.20443350688477
Mean Squared Error: 76.47334278970779
Root Mean Squared Error: 8.74490381820794

RMSE:  8.745


In [22]:
# Potassium

rf3 = RandomForestRegressor(**best_params_rf)
rf3.fit(X_train, y3_train)
y3_pred = rf3.predict(X_test)

print("Random Forest Regressor (Potassium):")

print('Mean Absolute Error:', metrics.mean_absolute_error(y3_test, y3_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y3_test, y3_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y3_test, y3_pred)))

rmse = float(format(np.sqrt(metrics.mean_squared_error(y3_test, y3_pred)), '.3f'))
print("\nRMSE: ", rmse)

Random Forest Regressor (Potassium):
Mean Absolute Error: 2.7810995403246057
Mean Squared Error: 10.52889489927055
Root Mean Squared Error: 3.244825865785489

RMSE:  3.245


### Gradient Boosting

In [23]:
# Define the hyperparameter grid for GridSearch
param_grid = {
    'n_estimators': [50, 100, 150],  
    'max_depth': [5, 10, 15],  
    'min_samples_split': [2, 5, 10],  
    'min_samples_leaf': [2, 4, 6] 
}

gbr = GradientBoostingRegressor(random_state=42)

grid_search = GridSearchCV(estimator=gbr, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train, y1_train)

best_params_gb = grid_search.best_params_
best_regressor_gb = grid_search.best_estimator_


In [24]:
print(best_regressor_gb)
print(best_params_gb)

GradientBoostingRegressor(max_depth=5, min_samples_leaf=6, n_estimators=50,
                          random_state=42)
{'max_depth': 5, 'min_samples_leaf': 6, 'min_samples_split': 2, 'n_estimators': 50}


In [25]:
# Nitrogen

gb1 = GradientBoostingRegressor(**best_params_gb)
gb1.fit(X_train, y1_train)
y1_pred = gb1.predict(X_test)

print("Random Forest Regressor (Nitrogen):")

print('Mean Absolute Error:', metrics.mean_absolute_error(y1_test, y1_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y1_test, y1_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y1_test, y1_pred)))

rmse = float(format(np.sqrt(metrics.mean_squared_error(y1_test, y1_pred)), '.3f'))

Random Forest Regressor (Nitrogen):
Mean Absolute Error: 11.16738471054076
Mean Squared Error: 170.65558385745766
Root Mean Squared Error: 13.063521112527727


In [26]:
# Phosphorus

gb2 = GradientBoostingRegressor(**best_params_gb)
gb2.fit(X_train, y2_train)
y2_pred = gb2.predict(X_test)

print("Random Forest Regressor (Phosphorus):")

print('Mean Absolute Error:', metrics.mean_absolute_error(y2_test, y2_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y2_test, y2_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y2_test, y2_pred)))

rmse = float(format(np.sqrt(metrics.mean_squared_error(y2_test, y2_pred)), '.3f'))
print("\nRMSE: ", rmse)

Random Forest Regressor (Phosphorus):
Mean Absolute Error: 7.209923171880688
Mean Squared Error: 73.26685960314803
Root Mean Squared Error: 8.559606276175794

RMSE:  8.56


In [27]:
# Potassium

gb3 = GradientBoostingRegressor(**best_params_gb)
gb3.fit(X_train, y2_train)
y2_pred = gb3.predict(X_test)

print("Random Forest Regressor (Potassium):")

print('Mean Absolute Error:', metrics.mean_absolute_error(y3_test, y3_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y3_test, y3_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y3_test, y3_pred)))

rmse = float(format(np.sqrt(metrics.mean_squared_error(y3_test, y3_pred)), '.3f'))
print("\nRMSE: ", rmse)

Random Forest Regressor (Potassium):
Mean Absolute Error: 2.7810995403246057
Mean Squared Error: 10.52889489927055
Root Mean Squared Error: 3.244825865785489

RMSE:  3.245


### Light GBM

In [28]:
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [5, 10, 15],
    'min_child_samples': [2, 5, 10],  
    'min_child_weight': [1e-3, 1e-2, 1e-1],  
}

regressor = LGBMRegressor(random_state=42)

grid_search = GridSearchCV(estimator=regressor, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y1_train)

best_params_lgb = grid_search.best_params_
best_regressor_lgb = grid_search.best_estimator_


In [29]:
print(best_regressor_lgb)
print(best_params_lgb)

LGBMRegressor(max_depth=15, min_child_samples=2, n_estimators=50,
              random_state=42)
{'max_depth': 15, 'min_child_samples': 2, 'min_child_weight': 0.001, 'n_estimators': 50}


In [30]:
# Nitrogen

lgb1 = LGBMRegressor(**best_params_lgb)
lgb1.fit(X_train, y1_train)
y1_pred = lgb1.predict(X_test)

print("Random Forest Regressor (Nitrogen):")

print('Mean Absolute Error:', metrics.mean_absolute_error(y1_test, y1_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y1_test, y1_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y1_test, y1_pred)))

rmse = float(format(np.sqrt(metrics.mean_squared_error(y1_test, y1_pred)), '.3f'))

Random Forest Regressor (Nitrogen):
Mean Absolute Error: 10.78813821983173
Mean Squared Error: 161.61238469700282
Root Mean Squared Error: 12.712685974922955


In [31]:
# Phosphorus

lgb2 = LGBMRegressor(**best_params_lgb)
lgb2.fit(X_train, y1_train)
y1_pred = lgb2.predict(X_test)

print("Random Forest Regressor (Phosphorus):")

print('Mean Absolute Error:', metrics.mean_absolute_error(y2_test, y2_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y2_test, y2_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y2_test, y2_pred)))

rmse = float(format(np.sqrt(metrics.mean_squared_error(y2_test, y2_pred)), '.3f'))

Random Forest Regressor (Phosphorus):
Mean Absolute Error: 7.20992317188069
Mean Squared Error: 73.26685960314805
Root Mean Squared Error: 8.559606276175794


In [32]:
# Potassium

lgb3 = LGBMRegressor(**best_params_lgb)
lgb3.fit(X_train, y1_train)
y1_pred = lgb3.predict(X_test)

print("Random Forest Regressor (Potassium):")

print('Mean Absolute Error:', metrics.mean_absolute_error(y3_test, y3_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y3_test, y3_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y3_test, y3_pred)))

rmse = float(format(np.sqrt(metrics.mean_squared_error(y3_test, y3_pred)), '.3f'))
print("\nRMSE: ", rmse)

Random Forest Regressor (Potassium):
Mean Absolute Error: 2.7810995403246057
Mean Squared Error: 10.52889489927055
Root Mean Squared Error: 3.244825865785489

RMSE:  3.245


### ElasticNet

In [33]:
param_grid = {
    'alpha': [0.1, 1.0, 10.0], 
    'l1_ratio': [0.1, 0.5, 0.9], 
}

regressor = ElasticNet(random_state=42)

grid_search = GridSearchCV(estimator=regressor, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_params_en = grid_search.best_params_
best_regressor_en = grid_search.best_estimator_


In [34]:
print(best_regressor_en)
print(best_params_en)

ElasticNet(random_state=42)
{'alpha': 1.0, 'l1_ratio': 0.5}


In [35]:
# Nitrogen

en1 = ElasticNet(**best_params_en)
en1.fit(X_train, y1_train)
y1_pred = en1.predict(X_test)

print("ElasticNet (Nitrogen):")

print('Mean Absolute Error:', metrics.mean_absolute_error(y1_test, y1_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y1_test, y1_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y1_test, y1_pred)))

rmse = float(format(np.sqrt(metrics.mean_squared_error(y1_test, y1_pred)), '.3f'))

ElasticNet (Nitrogen):
Mean Absolute Error: 30.999470244469194
Mean Squared Error: 1276.3223245394788
Root Mean Squared Error: 35.72565359149471


### Best Model

In [36]:
# Random Forest Regressor

# save the model to disk
joblib.dump(rf1, './app/models/rf_N-v1.pkl')
joblib.dump(rf2, './app/models/rf_P-v1.pkl')
joblib.dump(rf3, './app/models/rf_K-v1.pkl')

['./app/models/rf_K-v1.pkl']

In [30]:
class NPKEstimator:
    def __init__(self, data = 'Nutrient_recommendation.csv', ):
        self.df = pd.read_csv(data, header=None)
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        
    
    def renameCol(self):
        self.df.columns = ['Crop', 'Temperature', 'Humidity', 'Rainfall', 'Label_N', 'Label_P', 'Label_K']
        self.df.drop(self.df.index[:1], inplace=True)
    
    
    def cropMapper(self):
        # create mapping of crop(string) to int type
        mapping = dict()

        with open("mapped_crops.csv", "w") as fh:
            fh.write("Crops,Key\n")
            for i, crop in enumerate(np.unique(self.df[['Crop']]), 1):
                mapping[crop] =  i
                fh.write("%s,%d\n" % (crop, i))
            mapping['NA'] = np.nan
            fh.write("NA,nan")
        # print(mapping)
        
        ordinal_cols_mapping = [{"col": "Crop", "mapping": mapping}, ]
        encoder = ce.OrdinalEncoder(cols = 'Crop', mapping = ordinal_cols_mapping, return_df = True)
        return mapping, encoder
    
    
    def estimator(self, crop, temp, humidity, rainfall, y_label):
        X = self.df.drop(['Label_N', 'Label_P', 'Label_K'], axis=1)
        y = self.df[y_label]
        
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)
        
        mapping, encoder = self.cropMapper()
        self.X_train = encoder.fit_transform(self.X_train)
        self.X_test = encoder.transform(self.X_test)
        
        regressor = RandomForestRegressor(n_estimators = 50, random_state = 0)
        regressor.fit(self.X_train, self.y_train)
        
        # y_pred = regressor.predict(self.X_test)
        query = [mapping[crop.strip().lower()], temp, humidity, rainfall]
        y_pred = regressor.predict([query])
        return y_pred
    
    
    def accuracyCalculator(self):
        model = RandomForestRegressor(n_jobs=-1)
        estimators = np.arange(10, 200, 10)
        scores = []
        for n in estimators:
            model.set_params(n_estimators=n)
            model.fit(self.X_train, self.y_train)
            scores.append(model.score(self.X_test, self.y_test))
        
        scores_arr = [round(sc, 3) for sc in scores]
        unique, counts = np.unique(scores_arr, return_counts = True)

        max_count = max(counts)
        accuracy = -1
        for uni, count in zip(unique, counts):
            # print(uni, count)
            if count == max_count:
                accuracy = uni

        # print("Model accuracy: %.2f" % (accuracy))
        return accuracy

In [38]:
if __name__ == '__main__':
    obj = NPKEstimator()
    obj.renameCol()
    # 'Label_N', 'Label_P', 'Label_K'
    # rice,21.94766735,80.97384195,213.3560921,67,59,41
    crop, temp, humidity, rainfall, y_label = 'rice',21.94766735,80.97384195,213.3560921,'Label_K'
    res = obj.estimator(crop, temp, humidity, rainfall, y_label)
    print(y_label, ":", res[0])

Label_K : 40.46
