In [95]:
import os
import random
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import warnings
warnings.filterwarnings("ignore")

In [96]:
data_dir = "E:\\He_is_enough03 X UniqoXTech X Dreams\\Click_here\\Artificial Intelligence\\ML - AGRIQO\\Data set\\Agriqo(slider1) - Sheet1.csv"

data = pd.read_csv(data_dir)

In [97]:
col = ['Unnamed: 9','Unnamed: 10','Unnamed: 11','Unnamed: 12']
data.drop(columns=col, axis=1, inplace=True)

In [98]:
import numpy as np

augmented_data = []

for index, row in data.iterrows():
    for _ in range(15):
        new_row = {
            'Agricultural zone': row['Agricultural zone'],
            'month(chara)': row['month(chara)'],
            'rainfall (mm)': row['rainfall (mm)'] + np.random.uniform(-3, 3),
            'temperature(avg)': row['temperature(avg)'] + np.random.uniform(-2, 2),
            'humidity(avg)': row['humidity(avg)'] + np.random.uniform(-10, 10),
            'sunlight(hour)': row['sunlight(hour)'],
            'direction of wind(deg)': row['direction of wind(deg)'] + np.random.uniform(-40, 40),
            'velocity of wind(km/h)': row['velocity of wind(km/h)'] + np.random.uniform(-5, 5),
            'label': row['label']
        }
        augmented_data.append(new_row)

augmented_data_df = pd.DataFrame(augmented_data)

print(f"Total number of rows in augmented data: {len(augmented_data_df)}")


Total number of rows in augmented data: 5595


In [99]:
data = pd.concat([data, augmented_data_df], ignore_index=True)

In [100]:
data = pd.get_dummies(data, columns=['Agricultural zone'])

In [101]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

data['label'] = encoder.fit_transform(data['label'])

label_mapping = {original_label: encoded_value for encoded_value, original_label in enumerate(encoder.classes_)}

print(label_mapping)

{'Aman': 0, 'Aush': 1, 'Badam Kharip - 1': 2, 'Badam robi': 3, 'Banana': 4, 'Boro': 5, 'Brinjal(Khorip)': 6, 'Brinjal(Robi)': 7, 'Corn(Robi)': 8, 'Guava': 9, 'Kharif cucumber': 10, 'Khorip Mug 1': 11, 'Mango': 12, 'Potato': 13, 'Rabi Cucumber': 14, 'Red Lentil': 15, 'Robi Mug': 16, 'Soybean': 17, 'Sugarcane': 18, 'Tomato': 19, 'Tula': 20, 'Wheat': 21, 'corn khorip-1': 22, 'garlic': 23, 'indian jujube': 24, 'jackfruit': 25, 'jute': 26, 'khorip green chilli': 27, 'khorip lau (grourd)': 28, 'khorip onion': 29, 'khorip pointed grourd': 30, 'khorip pumpkin Cucurbita': 31, 'licchi': 32, 'masterd seed': 33, 'papaya': 34, 'pineapple': 35, 'robi green chilli ': 36, 'robi lau (gourd)': 37, 'robi onion': 38, 'robi pointed gourd': 39, 'robi pumpkin Cucurbita': 40}


In [102]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from catboost import CatBoostClassifier
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn import metrics

X = data.drop(['label'],axis=1)
y = data['label']

X_t, X_val, y_t, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

X_num = X_t[['rainfall (mm)', 'temperature(avg)', 'humidity(avg)', 'sunlight(hour)','direction of wind(deg)', 'velocity of wind(km/h)']]
X_cat = X_t[['month(chara)','Agricultural zone_Barisal','Agricultural zone_Bogra', 'Agricultural zone_Chittagonj',
       'Agricultural zone_Cumilla', 'Agricultural zone_Dhaka',
       'Agricultural zone_Dinajpur', 'Agricultural zone_Faridpur',
       'Agricultural zone_Jashore', 'Agricultural zone_Khulna',
       'Agricultural zone_Mymensingh', 'Agricultural zone_Rajshahi',
       'Agricultural zone_Rangamati', 'Agricultural zone_Rangpur',
       'Agricultural zone_Sylhet']]

scaled_list=[]
scalers = {}

agricultural_zones = [
    'Agricultural zone_Barisal', 'Agricultural zone_Bogra', 'Agricultural zone_Chittagonj',
    'Agricultural zone_Cumilla', 'Agricultural zone_Dhaka', 'Agricultural zone_Dinajpur', 
    'Agricultural zone_Faridpur', 'Agricultural zone_Jashore', 'Agricultural zone_Khulna',
    'Agricultural zone_Mymensingh', 'Agricultural zone_Rajshahi', 'Agricultural zone_Rangamati', 
    'Agricultural zone_Rangpur', 'Agricultural zone_Sylhet'
]

for zone in agricultural_zones:
    zone_data = X_num[X_t[zone] == 1]
    
    if not zone_data.empty:
        scaler = RobustScaler()
        scaled_zone_data = scaler.fit_transform(zone_data)
        scaled_zone_df = pd.DataFrame(scaled_zone_data, columns=X_num.columns, index=zone_data.index)
        
        scaled_list.append(scaled_zone_df)
        scalers[zone] = scaler 

scaled_data = pd.concat(scaled_list)
# print(scaled_data.shape)
scaled_data = pd.concat([scaled_data, X_cat], axis=1)
scaled_data['label']=y_t

# print(scaled_data.isnull().sum().sum())
# print(scaled_data.shape)

X_scaled = scaled_data.drop(['label'],axis=1)
y_scaled = scaled_data['label']

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

print("Train set size:", X_train.shape)
print("Validation set size:", X_val.shape)
print("Test set size:", X_test.shape)

Train set size: (4296, 21)
Validation set size: (597, 21)
Test set size: (1075, 21)


In [103]:
counter = 0
for cls, scaler in scalers.items():
    print(f"{counter} Class: {cls}")
    if cls == 8:
        medians = scaler.center_  
        iqr = scaler.scale_  
        for feature, median, iqr_value in zip(X_num.columns, medians, iqr):
            print(f"Feature: {feature}")
            print(f"  Median: {median}")
            print(f"  IQR: {iqr_value}")
    # print("-" * 50)
    counter+=1

0 Class: Agricultural zone_Barisal
1 Class: Agricultural zone_Bogra
2 Class: Agricultural zone_Chittagonj
3 Class: Agricultural zone_Cumilla
4 Class: Agricultural zone_Dhaka
5 Class: Agricultural zone_Dinajpur
6 Class: Agricultural zone_Faridpur
7 Class: Agricultural zone_Jashore
8 Class: Agricultural zone_Khulna
9 Class: Agricultural zone_Mymensingh
10 Class: Agricultural zone_Rajshahi
11 Class: Agricultural zone_Rangamati
12 Class: Agricultural zone_Rangpur
13 Class: Agricultural zone_Sylhet


In [104]:
from sklearn.ensemble import VotingClassifier

zone_models={}

for zone in agricultural_zones:
    print(f"Training model for {zone}...")
    
    zone_X_train = X_train[X_train[zone]==1]
    zone_y_train = y_train[X_train[zone]==1]

    zone_X_train = zone_X_train.drop(columns=agricultural_zones)
    
    model = VotingClassifier(estimators=[
        ('dt', DecisionTreeClassifier()),
        ('rf', RandomForestClassifier(n_estimators=200)),
        ('gb', GradientBoostingClassifier()),
        ('cat', CatBoostClassifier(task_type="GPU", verbose=0)),
        ('xgb', XGBClassifier(tree_method='gpu_hist', use_label_encoder=False, eval_metric='mlogloss')),
    ], voting='soft')
    
    model.fit(zone_X_train, zone_y_train)
    
    zone_models[zone] = model

Training model for Agricultural zone_Barisal...
Training model for Agricultural zone_Bogra...
Training model for Agricultural zone_Chittagonj...
Training model for Agricultural zone_Cumilla...
Training model for Agricultural zone_Dhaka...
Training model for Agricultural zone_Dinajpur...
Training model for Agricultural zone_Faridpur...
Training model for Agricultural zone_Jashore...
Training model for Agricultural zone_Khulna...
Training model for Agricultural zone_Mymensingh...
Training model for Agricultural zone_Rajshahi...
Training model for Agricultural zone_Rangamati...
Training model for Agricultural zone_Rangpur...
Training model for Agricultural zone_Sylhet...


In [106]:
from sklearn.metrics import accuracy_score

y_pred = []
y_true = []
for idx, row in X_test.iterrows():
    zone = None
    for z in agricultural_zones:
        if row[z] == 1:
            zone = z
            break 

    if zone and zone in zone_models:
        model = zone_models[zone]

        input_data = row.drop(list(agricultural_zones)).values.reshape(1, -1)
        
        # Make prediction
        prediction = model.predict(input_data)[0]

        # Store results
        y_true.append(y_test.loc[idx])  # Actual label
        y_pred.append(prediction)  # Predicted label

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)
print("Zone-wise Model Test Accuracy:", accuracy)

Zone-wise Model Test Accuracy: 0.7441860465116279
