In [None]:
#Entire dataset

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
import numpy as np

In [None]:
# Step 1: Load the dataset
data = pd.read_excel("newww_weed1 (3).xlsx")

# Step 2: Set up k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Step 3: Perform k-fold cross-validation
accuracies = []
predi=[]

In [None]:
for fold, (train_index, test_index) in enumerate(kf.split(data), 1):
    train_data, test_data = data.iloc[train_index].copy(), data.iloc[test_index].copy()

    # Generate density plots for training data
    features = train_data.drop(columns=['Outcome'])

    for feature in features.columns:
      plt.figure(figsize=(4, 3))
      sns.kdeplot(data=train_data, x=feature, hue='Outcome', fill=True, common_norm=False, palette="husl")
      plt.title(f'Density plot of {feature} (Training Data - Fold {fold})')
      plt.show()
      print()

    # Calculate spread factor for training data
    spread_factors = train_data.groupby('Outcome').apply(lambda x: x.mean().tolist())

    print("Spread factor for fold",fold)
    print(list(spread_factors))

    # Find correct threshold for training data
    threshold_multiplier = 1.5
    thresholds = {cls: [spread_factors.loc[cls][i] * threshold_multiplier for i in range(len(spread_factors.iloc[0]))] for cls in spread_factors.index}
    print("Thresholds for fold",fold)
    print(thresholds)

    def find_class(row, thresholds):
        max_distance = float('-inf')
        predicted_class = 'Unknown'

        for cls, class_thresholds in thresholds.items():
            distance = sum(1 for i, value in enumerate(row) if value < class_thresholds[i])
            if distance > max_distance:
                max_distance = distance
                predicted_class = cls
        return predicted_class

    predictions = test_data.drop(columns='Outcome').apply(lambda row: find_class(row, thresholds), axis=1)
    print("Predictions")
    print(predictions)

    # Evaluate the predictions
    test_labels = test_data['Outcome']
    accuracy = sum(1 for pred, label in zip(predictions, test_labels) if pred == label) / len(test_labels)
    accuracies.append(accuracy)
    predi.append(predictions)
    acc=max(accuracies)
    print()


In [None]:
# Print the average accuracy across all folds
print(f"Overall Accuracy: {acc*100:.3f} %")

Overall Accuracy: 60.714 %


In [None]:
pred_ind=accuracies.index(acc)
final_prediction=predi[pred_ind]

# Initialize dictionaries to store class-wise counts
correct_counts = {cls: 0 for cls in set(test_labels)}
total_counts = {cls: 0 for cls in set(test_labels)}

# Calculate overall accuracy and class-wise counts
for pred, label in zip(final_prediction, test_labels):
    if pred == label:
        correct_counts[label] += 1
    total_counts[label] += 1

# Calculate class-wise accuracies
class_accuracies = {cls: correct_counts[cls] / total_counts[cls] if total_counts[cls] != 0 else 0 for cls in correct_counts}

print("Class-wise Accuracies:")
for cls, accuracy in class_accuracies.items():
    print(f"{cls}: {accuracy*100:.3f} %")

print("Accuracy for 1 and 2: ",(correct_counts[1]+correct_counts[2])/(total_counts[1]+total_counts[2])*100," %")
print("Accuracy for 3 and 4: ",(correct_counts[3]+correct_counts[4])/(total_counts[3]+total_counts[4])*100," %")


Class-wise Accuracies:
1: 90.909 %
2: 72.222 %
3: 42.308 %
4: 38.889 %
Accuracy for 1 and 2:  82.5  %
Accuracy for 3 and 4:  40.909090909090914  %


In [None]:
#Seperate dataset for common lambsquarters and common purslane

# Step 1: Load the dataset
data = pd.read_excel("direct1and2 (2).xlsx")

# Step 2: Set up k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Step 3: Perform k-fold cross-validation
accuracies = []

for fold, (train_index, test_index) in enumerate(kf.split(data), 1):
    train_data, test_data = data.iloc[train_index].copy(), data.iloc[test_index].copy()

    # Generate density plots for training data
    features = train_data.drop(columns=['Outcome'])

    # Calculate spread factor for training data
    spread_factors = train_data.groupby('Outcome').apply(lambda x: x.mean().tolist())

    # Find correct threshold for training data
    threshold_multiplier = 1.5
    thresholds = {cls: [spread_factors.loc[cls][i] * threshold_multiplier for i in range(len(spread_factors.iloc[0]))] for cls in spread_factors.index}

    def find_class(row, thresholds):
        max_distance = float('-inf')
        predicted_class = 'Unknown'

        for cls, class_thresholds in thresholds.items():
            distance = sum(1 for i, value in enumerate(row) if value < class_thresholds[i])
            if distance > max_distance:
                max_distance = distance
                predicted_class = cls
        return predicted_class

    predictions = test_data.drop(columns='Outcome').apply(lambda row: find_class(row, thresholds), axis=1)

    # Evaluate the predictions
    test_labels = test_data['Outcome']
    accuracy = sum(1 for pred, label in zip(predictions, test_labels) if pred == label) / len(test_labels)
    accuracies.append(accuracy)
    acc=max(accuracies)

# Print the average accuracy across all folds
print(f"Accuracy: {acc*100:.3f} %")

Accuracy: 97.561 %


In [None]:
#Seperate dataset for horseweed and redroot pigweed

# Step 1: Load the dataset
data = pd.read_excel("newww_weed1 (4).xlsx")

# Step 2: Set up k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Step 3: Perform k-fold cross-validation
accuracies = []

for fold, (train_index, test_index) in enumerate(kf.split(data), 1):
    train_data, test_data = data.iloc[train_index].copy(), data.iloc[test_index].copy()

    # Generate density plots for training data
    features = train_data.drop(columns=['Outcome'])

    # Calculate spread factor for training data
    spread_factors = train_data.groupby('Outcome').apply(lambda x: x.mean().tolist())

    # Find correct threshold for training data
    threshold_multiplier = 1.5
    thresholds = {cls: [spread_factors.loc[cls][i] * threshold_multiplier for i in range(len(spread_factors.iloc[0]))] for cls in spread_factors.index}

    def find_class(row, thresholds):
        max_distance = float('-inf')
        predicted_class = 'Unknown'

        for cls, class_thresholds in thresholds.items():
            distance = sum(1 for i, value in enumerate(row) if value < class_thresholds[i])
            if distance > max_distance:
                max_distance = distance
                predicted_class = cls
        return predicted_class

    predictions = test_data.drop(columns='Outcome').apply(lambda row: find_class(row, thresholds), axis=1)

    # Evaluate the predictions
    test_labels = test_data['Outcome']
    accuracy = sum(1 for pred, label in zip(predictions, test_labels) if pred == label) / len(test_labels)
    accuracies.append(accuracy)
    acc=max(accuracies)

# Print the average accuracy across all folds
print(f"Accuracy: {acc*100:.3f} %")

Accuracy: 88.636 %


In [None]:
#Seperate dataset for common lambsquarters

# Step 1: Load the dataset
data = pd.read_excel("direct1.xlsx")

# Step 2: Set up k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Step 3: Perform k-fold cross-validation
accuracies = []

for fold, (train_index, test_index) in enumerate(kf.split(data), 1):
    train_data, test_data = data.iloc[train_index].copy(), data.iloc[test_index].copy()

    # Generate density plots for training data
    features = train_data.drop(columns=['Outcome'])

    # Calculate spread factor for training data
    spread_factors = train_data.groupby('Outcome').apply(lambda x: x.mean().tolist())

    # Find correct threshold for training data
    threshold_multiplier = 1.5
    thresholds = {cls: [spread_factors.loc[cls][i] * threshold_multiplier for i in range(len(spread_factors.iloc[0]))] for cls in spread_factors.index}

    def find_class(row, thresholds):
        max_distance = float('-inf')
        predicted_class = 'Unknown'

        for cls, class_thresholds in thresholds.items():
            distance = sum(1 for i, value in enumerate(row) if value < class_thresholds[i])
            if distance > max_distance:
                max_distance = distance
                predicted_class = cls
        return predicted_class

    predictions = test_data.drop(columns='Outcome').apply(lambda row: find_class(row, thresholds), axis=1)

    # Evaluate the predictions
    test_labels = test_data['Outcome']
    accuracy = sum(1 for pred, label in zip(predictions, test_labels) if pred == label) / len(test_labels)
    accuracies.append(accuracy)
    acc=max(accuracies)

# Print the average accuracy across all folds
print(f"Accuracy: {acc*100:.3f} %")

Accuracy: 100.000 %


In [None]:
#Seperate dataset for common purslane

# Step 1: Load the dataset
data = pd.read_excel("direct2.xlsx")

# Step 2: Set up k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Step 3: Perform k-fold cross-validation
accuracies = []

for fold, (train_index, test_index) in enumerate(kf.split(data), 1):
    train_data, test_data = data.iloc[train_index].copy(), data.iloc[test_index].copy()

    # Generate density plots for training data
    features = train_data.drop(columns=['Outcome'])

    # Calculate spread factor for training data
    spread_factors = train_data.groupby('Outcome').apply(lambda x: x.mean().tolist())

    # Find correct threshold for training data
    threshold_multiplier = 1.5
    thresholds = {cls: [spread_factors.loc[cls][i] * threshold_multiplier for i in range(len(spread_factors.iloc[0]))] for cls in spread_factors.index}

    def find_class(row, thresholds):
        max_distance = float('-inf')
        predicted_class = 'Unknown'

        for cls, class_thresholds in thresholds.items():
            distance = sum(1 for i, value in enumerate(row) if value < class_thresholds[i])
            if distance > max_distance:
                max_distance = distance
                predicted_class = cls
        return predicted_class

    predictions = test_data.drop(columns='Outcome').apply(lambda row: find_class(row, thresholds), axis=1)

    # Evaluate the predictions
    test_labels = test_data['Outcome']
    accuracy = sum(1 for pred, label in zip(predictions, test_labels) if pred == label) / len(test_labels)
    accuracies.append(accuracy)
    acc=max(accuracies)

# Print the average accuracy across all folds
print(f"Accuracy: {acc*100:.3f} %")

Accuracy: 100.000 %


In [None]:
#Seperate dataset for horseweed

# Step 1: Load the dataset
data = pd.read_excel("direct3.xlsx")

# Step 2: Set up k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Step 3: Perform k-fold cross-validation
accuracies = []

for fold, (train_index, test_index) in enumerate(kf.split(data), 1):
    train_data, test_data = data.iloc[train_index].copy(), data.iloc[test_index].copy()

    # Generate density plots for training data
    features = train_data.drop(columns=['Outcome'])

    # Calculate spread factor for training data
    spread_factors = train_data.groupby('Outcome').apply(lambda x: x.mean().tolist())

    # Find correct threshold for training data
    threshold_multiplier = 1.5
    thresholds = {cls: [spread_factors.loc[cls][i] * threshold_multiplier for i in range(len(spread_factors.iloc[0]))] for cls in spread_factors.index}

    def find_class(row, thresholds):
        max_distance = float('-inf')
        predicted_class = 'Unknown'

        for cls, class_thresholds in thresholds.items():
            distance = sum(1 for i, value in enumerate(row) if value < class_thresholds[i])
            if distance > max_distance:
                max_distance = distance
                predicted_class = cls
        return predicted_class

    predictions = test_data.drop(columns='Outcome').apply(lambda row: find_class(row, thresholds), axis=1)

    # Evaluate the predictions
    test_labels = test_data['Outcome']
    accuracy = sum(1 for pred, label in zip(predictions, test_labels) if pred == label) / len(test_labels)
    accuracies.append(accuracy)
    acc=max(accuracies)

# Print the average accuracy across all folds
print(f"Accuracy: {acc*100:.3f} %")

Accuracy: 100.000 %


In [None]:
#Seperate dataset for redroot pigweed

# Step 1: Load the dataset
data = pd.read_excel("direct4.xlsx")

# Step 2: Set up k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Step 3: Perform k-fold cross-validation
accuracies = []

for fold, (train_index, test_index) in enumerate(kf.split(data), 1):
    train_data, test_data = data.iloc[train_index].copy(), data.iloc[test_index].copy()

    # Generate density plots for training data
    features = train_data.drop(columns=['Outcome'])

    # Calculate spread factor for training data
    spread_factors = train_data.groupby('Outcome').apply(lambda x: x.mean().tolist())

    # Find correct threshold for training data
    threshold_multiplier = 1.5
    thresholds = {cls: [spread_factors.loc[cls][i] * threshold_multiplier for i in range(len(spread_factors.iloc[0]))] for cls in spread_factors.index}

    def find_class(row, thresholds):
        max_distance = float('-inf')
        predicted_class = 'Unknown'

        for cls, class_thresholds in thresholds.items():
            distance = sum(1 for i, value in enumerate(row) if value < class_thresholds[i])
            if distance > max_distance:
                max_distance = distance
                predicted_class = cls
        return predicted_class

    predictions = test_data.drop(columns='Outcome').apply(lambda row: find_class(row, thresholds), axis=1)

    # Evaluate the predictions
    test_labels = test_data['Outcome']
    accuracy = sum(1 for pred, label in zip(predictions, test_labels) if pred == label) / len(test_labels)
    accuracies.append(accuracy)
    acc=max(accuracies)

# Print the average accuracy across all folds
print(f"Accuracy: {acc*100:.3f} %")

Accuracy: 100.000 %
