In [11]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

# Load the Titanic dataset (you may need to adjust the path)
titanic_data = pd.read_csv('titanic_dataset .csv')

# Assume 'Survived' is the target variable
X = titanic_data.drop('Survived', axis=1)
y = titanic_data['Survived']

# Identify non-numeric columns
non_numeric_columns = X.select_dtypes(include=['object']).columns

# Create transformers for numeric and categorical columns
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_features = non_numeric_columns
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine transformers
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create kNN model
knn_model = KNeighborsClassifier(n_neighbors=5)

# Create SVM model
svm_model = SVC(kernel='linear', C=1)

# Create pipelines for kNN and SVM including preprocessing
knn_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                ('classifier', knn_model)])

svm_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                ('classifier', svm_model)])

# Perform k-fold cross-validation for kNN
knn_scores = cross_val_score(knn_pipeline, X, y, cv=5, scoring='accuracy')

# Perform k-fold cross-validation for SVM
svm_scores = cross_val_score(svm_pipeline, X, y, cv=5, scoring='accuracy')

# Calculate average accuracy scores
knn_avg_accuracy = knn_scores.mean()
svm_avg_accuracy = svm_scores.mean()

# Print the results
print("kNN Average Accuracy (k-fold):", knn_avg_accuracy)
print("SVM Average Accuracy (k-fold):", svm_avg_accuracy)


kNN Average Accuracy (k-fold): 0.7991023790094782
SVM Average Accuracy (k-fold): 0.8350009415604795
