In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.decomposition import PCA

# Read the Excel dataset into a DataFrame
df = pd.read_excel('new_ml.xlsx')

# Assuming the target variable is in the last column, and other columns are features
X = df.iloc[:, :-1] # Features
y = df.iloc[:, -1] # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the pipeline with scaling, PCA, and kNN
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=21)),  # Adjust n_components as needed
    ('knn', KNeighborsClassifier())
])

# Define the parameter grid to search
param_grid = {
    'knn__n_neighbors': range(1, 21), # Search for k from 1 to 20
    'knn__weights': ['uniform', 'distance'], # Search for different weight options
    'knn__p': [1, 2] # Search for Manhattan and Euclidean distances
}

# Create GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1) # 5-fold cross-validation, parallel execution

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Print the best parameters and best score
print("Best Parameters: ", grid_search.best_params_)
print("Best Score: ", grid_search.best_score_)

# Evaluate the best model on the test set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Calculate and print testing accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

# Calculate and print precision, recall, and F1-score
precision = precision_score(y_test, y_pred, average='micro')
recall = recall_score(y_test, y_pred,average='micro')
f1 = f1_score(y_test, y_pred,average='micro')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Best Parameters:  {'knn__n_neighbors': 4, 'knn__p': 1, 'knn__weights': 'distance'}
Best Score:  0.8729718343613224
Test Accuracy: 0.9071925754060325
Precision: 0.9071925754060325
Recall: 0.9071925754060325
F1-score: 0.9071925754060325


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline

# Load dataset from Excel file
excel_file_path = "new_ml.xlsx"
df = pd.read_excel(excel_file_path)

# Assuming the last column is the target variable and rest are features
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create pipeline with SMOTE, feature scaling, feature selection, and SVM
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('selector', SelectKBest(f_classif)),
    ('svm', SVC())
])

# Parameter grid for GridSearchCV
param_grid = {
    'selector__k': [1, 21],  # Adjust as needed based on feature importance
    'svm__C': [0.1, 4, 50],  # Regularization parameter
    'svm__kernel': ['linear', 'rbf', 'poly','sigmoid'],  # Kernel functions
    'svm__gamma': ['scale', 'auto']
}

# Grid search with cross-validation
grid_search = GridSearchCV(pipeline, param_grid, scoring='accuracy', cv=5)
grid_search.fit(X_train, y_train)

# Best parameters found
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Training the SVM model with best parameters
pipeline.set_params(**best_params)
pipeline.fit(X_train, y_train)

# Making predictions
y_pred = pipeline.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy*100)

# Calculate and print precision, recall, and F1-score
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred,average='macro')
f1 = f1_score(y_test, y_pred,average='macro')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Best Parameters: {'selector__k': 21, 'svm__C': 50, 'svm__gamma': 'scale', 'svm__kernel': 'rbf'}
Accuracy: 88.63109048723898
Precision: 0.8866591009083276
Recall: 0.8886511216118193
F1-score: 0.8874419831730455


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Load dataset from Excel file
excel_file_path = "new_ml.xlsx"
df = pd.read_excel(excel_file_path)

# Assuming the last column is the target variable and rest are features
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Preprocessing: Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# PCA for dimensionality reduction
pca = PCA(n_components=21)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Initialize the Random Forest classifier with tuned hyperparameters
rf_classifier = RandomForestClassifier(n_estimators=600, max_depth=70, min_samples_split=2, min_samples_leaf=1, random_state=42)

# Training the Random Forest model
rf_classifier.fit(X_train_pca, y_train)

# Making predictions
y_pred = rf_classifier.predict(X_test_pca)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
# Calculate and print precision, recall, and F1-score
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Accuracy: 0.925754060324826
Precision: 0.9255400501207389
Recall: 0.930225188781014
F1-score: 0.9273673789904311
