In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC  # Import the SVM classifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

# Step 2: Load the CSV file and examine the data
data = pd.read_csv("/kaggle/input/pesurrcampusmicompetitionc/train.csv")

# Step 3: Separate the features (X) and the labels (y)
X = data.drop("label", axis=1)
y = data["label"]

# Step 4: Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Preprocess the data
numeric_columns = X_train.select_dtypes(include=["float64", "int64"]).columns
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train[numeric_columns])
X_val_scaled = scaler.transform(X_val[numeric_columns])

# Step 6: Hyperparameter Tuning for SVM
param_grid = {
    'C': [1, 10, 100],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}
grid_search = GridSearchCV(SVC(random_state=42), param_grid, cv=3, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

# Get the best model from the grid search
best_model = grid_search.best_estimator_

# Step 7: Make predictions on the validation set
y_pred = best_model.predict(X_val_scaled)

# Step 8: Evaluate the model's accuracy
accuracy = accuracy_score(y_val, y_pred)
print("Validation Accuracy:", accuracy)

# Step 9: Load the test data and preprocess it
test_data = pd.read_csv("/kaggle/input/pesurrcampusmicompetitionc/test.csv")
test_data_scaled = scaler.transform(test_data[numeric_columns])

# Step 10: Make predictions on the test data and create a submission file
test_predictions = best_model.predict(test_data_scaled)

# Convert label column to numeric encoding
label_encoder = LabelEncoder()
test_predictions_encoded = label_encoder.fit_transform(test_predictions)

submission_df = pd.DataFrame({"id": test_data["id"], "label": test_predictions_encoded})
submission_df.to_csv("submission.csv", index=False)


Validation Accuracy: 0.914952751528627
