In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report

# Load the Titanic dataset
titanic_data = pd.read_csv('/content/titanic_train.csv')

# Drop columns that may not contribute to the model or require additional processing
titanic_data = titanic_data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

# Separate features and target variable
X = titanic_data.drop('Survived', axis=1)
y = titanic_data['Survived']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define numeric and categorical features
numeric_features = ['Age', 'Fare']
categorical_features = ['Pclass', 'Sex', 'Embarked']

# Create transformers for numeric and categorical features
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Create a column transformer to apply transformers to different feature sets
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create an SVC model
svc_model = SVC()

# Create a pipeline with preprocessing and SVC
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', svc_model)
])

# Fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = pipeline.predict(X_test)

# Evaluate the model
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.8044692737430168
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.90      0.84       105
           1       0.83      0.66      0.74        74

    accuracy                           0.80       179
   macro avg       0.81      0.78      0.79       179
weighted avg       0.81      0.80      0.80       179



In [2]:
import joblib

# Save the trained pipeline
joblib.dump(pipeline, 'titanic_svc_model.joblib')


['titanic_svc_model.joblib']

In [3]:
# Load the saved model
loaded_pipeline = joblib.load('titanic_svc_model.joblib')

# Load the new data for prediction
new_data = pd.read_csv('/content/titanic_test.csv')  # Replace 'new_data.csv' with the actual filename

# Make predictions using the loaded pipeline
new_data_predictions = loaded_pipeline.predict(new_data)

# Create a DataFrame to store predictions
predictions_df = pd.DataFrame({
    'PassengerId': new_data['PassengerId'],
    'Survived_Predicted': new_data_predictions
})

# Display the DataFrame with predictions
print("Predictions for New Data:")
print(predictions_df)


Predictions for New Data:
     PassengerId  Survived_Predicted
0            892                   0
1            893                   0
2            894                   0
3            895                   0
4            896                   0
..           ...                 ...
413         1305                   0
414         1306                   1
415         1307                   0
416         1308                   0
417         1309                   0

[418 rows x 2 columns]
