In [1]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score
from sklearn.impute import SimpleImputer

In [2]:
def apply_algorithms(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    imputer = SimpleImputer(strategy='mean')
    X_train = imputer.fit_transform(X_train)
    X_test = imputer.transform(X_test)

    models = {
        'Naive Bayes': GaussianNB(),
        'Random Forest': RandomForestClassifier(n_estimators=100, max_depth=10),
        'SVM': SVC(kernel='sigmoid'),
        'Logistic Regression': LogisticRegression()
    }

    results = {}

    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        results[name] = {'Accuracy': accuracy, 'Precision': precision}

    return results

In [3]:

rice_data = pd.read_csv('rice_dataset.csv')
X_rice = rice_data.drop(' Class', axis=1)
y_rice = rice_data[' Class']
rice_results = apply_algorithms(X_rice, y_rice)

# salary_data = pd.read_csv('adult.csv')
# X_salary = salary_data[['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']]
# y_salary = salary_data['income']
# salary_results = apply_algorithms(X_salary, y_salary)

# heart_data = pd.read_csv('echocardiogram.csv')
# X_heart = heart_data[['age-at-heart-attack', 'fractional-shortening', 'epss', 'lvdd', 'wall-motion-score', 'wall-motion-index']]
# y_heart = heart_data['alive-at-1']
# heart_results = apply_algorithms(X_heart, y_heart)

for dataset, results in zip(['Rice Variety'], [rice_results]):
    print(f"\nResults for {dataset} Dataset:")
    for model, metrics in results.items():
        print(f"{model}:")
        print(f"  Accuracy: {metrics['Accuracy']:.4f}")
        print(f"  Precision: {metrics['Precision']:.4f}")
    
    best_model = max(results, key=lambda x: results[x]['Accuracy'])
    best_accuracy = results[best_model]['Accuracy']
    print(f"\nBest model for {dataset} Dataset: {best_model} (Accuracy: {best_accuracy:.4f})")


Results for Rice Variety Dataset:
Naive Bayes:
  Accuracy: 0.9147
  Precision: 0.9148
Random Forest:
  Accuracy: 0.9239
  Precision: 0.9239
SVM:
  Accuracy: 0.1942
  Precision: 0.1789
Logistic Regression:
  Accuracy: 0.9291
  Precision: 0.9291

Best model for Rice Variety Dataset: Logistic Regression (Accuracy: 0.9291)
