**RANDOM FOREST**

In [None]:
# libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# training data
train_data_path = 'training_data.csv'
df_train = pd.read_csv(train_data_path)

# Assuming the first column is the labels and the rest are features
X_train = df_train.iloc[:, 1:]  # Features
y_train = df_train.iloc[:, 0]   # Labels

# Split the data into training and testing sets
X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Create and train the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_split, y_train_split)

# Make predictions on the training set to evaluate accuracy
train_predictions = rf_classifier.predict(X_test_split)
accuracy = accuracy_score(y_test_split, train_predictions)
print(f"Accuracy on the training set: {accuracy:.2f}")

# test data
test_data_path = 'test_data.csv'
df_test = pd.read_csv(test_data_path)

# Assuming the first column is the labels and the rest are features
X_test = df_test.iloc[:, 1:]  # Features

# Make predictions on the new dataset
test_predictions = rf_classifier.predict(X_test)

# Calculate accuracy on the test data
test_accuracy = accuracy_score(df_test.iloc[:, 0], test_predictions)
print(f"Accuracy on the test set: {test_accuracy:.2f}")

# Display the predictions
df_test['Predictions'] = test_predictions
print("\nPredictions:")
print(df_test[['label', 'Predictions']])  # Replace 'Label_Column_Name' with the actual label column name

output_file_path = 'test_data_predictions.csv'
df_test[['label', 'Predictions']].to_csv(output_file_path, index=False)

print(f"\nPredictions saved to {output_file_path}")

**SUPPORT VECTOR MACHINE**

In [None]:
# libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# training data
train_data_path = 'training_data.csv'
df_train = pd.read_csv(train_data_path)

# Assuming the first column is the labels and the rest are features
X_train = df_train.iloc[:, 1:]  # Features
y_train = df_train.iloc[:, 0]   # Labels

# Split the data into training and testing sets
X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Create and train the Support Vector Machine classifier
svm_classifier = SVC(random_state=42)
svm_classifier.fit(X_train_split, y_train_split)

# Make predictions on the training set to evaluate accuracy
train_predictions = svm_classifier.predict(X_test_split)
accuracy = accuracy_score(y_test_split, train_predictions)
print(f"Accuracy on the training set: {accuracy:.2f}")

# test data
test_data_path = 'test_data.csv'
df_test = pd.read_csv(test_data_path)

# Assuming the first column is the labels and the rest are features
X_test = df_test.iloc[:, 1:]  # Features

# Make predictions on the new dataset
test_predictions = svm_classifier.predict(X_test)

# Calculate accuracy on the test data
test_accuracy = accuracy_score(df_test.iloc[:, 0], test_predictions)
print(f"Accuracy on the test set: {test_accuracy:.2f}")

# Display the predictions
df_test['Predictions'] = test_predictions
print("\nPredictions:")
print(df_test[['label', 'Predictions']])  # Replace 'label' with the actual label column name

output_file_path = 'test_data_predictions.csv'
df_test[['label', 'Predictions']].to_csv(output_file_path, index=False)

print(f"\nPredictions saved to {output_file_path}")

**XGBOOST**

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

# training data
train_data_path = 'training_data'
df_train = pd.read_csv(train_data_path)

# Assuming the first column is the labels and the rest are features
X_train = df_train.iloc[:, 1:]  # Features
y_train = df_train.iloc[:, 0]   # Labels

# Convert string labels to integer labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

# Split the data into training and testing sets
X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(X_train, y_train_encoded, test_size=0.2, random_state=42)

# Create and train the XGBoost classifier
xgb_classifier = XGBClassifier()
xgb_classifier.fit(X_train_split, y_train_split)

# Make predictions on the training set to evaluate accuracy
train_predictions = xgb_classifier.predict(X_test_split)
accuracy = accuracy_score(y_test_split, train_predictions)
print(f"Accuracy on the training set: {accuracy:.2f}")

# test data
test_data_path = 'test_data.csv'
df_test = pd.read_csv(test_data_path)

# Assuming the first column is the labels and the rest are features
X_test = df_test.iloc[:, 1:]  # Features

# Make predictions on the new dataset using the trained model
test_predictions = xgb_classifier.predict(X_test)

# Display the predictions
df_test['Predictions'] = label_encoder.inverse_transform(test_predictions)
print("\nPredictions:")
print(df_test[['label', 'Predictions']])  # Replace 'label' with the actual label column name

output_file_path = 'test_data_predictions.csv'
df_test[['label', 'Predictions']].to_csv(output_file_path, index=False)

print(f"\nPredictions saved to {output_file_path}")