## Introduction

I chose to submit my solution using the polynomial SVC with the parameter C = 4

In [37]:
# Data analysis
import numpy as np
import pandas as pd
import random as rnd
from statistics import mean
import math

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Support Vector Classifier
from sklearn.svm import SVC

In [38]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [39]:
# Removing PassengerId
train_df = train_df.drop('PassengerId', axis=1)

# Convering 'Name'
name_df = train_df.Name.str.split(expand = True)
train_df['Name'] = name_df[1]
train_df.loc[(train_df['Name'] != 'Mr.') & (train_df['Name'] != 'Mrs.') & (train_df['Name'] != 'Miss.') & (train_df['Name'] != 'Master.'), 'Name'] = 'Other'

# Converting 'Ticket'
train_df['Ticket'] = train_df['Ticket'].map(train_df['Ticket'].value_counts()).astype('int64')

# Converting 'Cabin'
train_df['Cabin'] = train_df['Cabin'].notnull().astype('int64')

# Remove missing values of 'Embarked'
train_df = train_df[train_df.Embarked.notnull()]
# Fill missing values with mean value
train_df = train_df.fillna(train_df.Age.mean())

# Encoding 'Sex' categorical data into a numeric columns
train_df.loc[train_df.Sex == 'male', 'Sex'] = 0
train_df.loc[train_df.Sex == 'female', 'Sex'] = 1
train_df['Sex'].astype('int64')
# Encoding 'Name' categorical data using 'One Hot' method - get_dummies in pandas
train_df = pd.get_dummies(train_df, columns=['Embarked'], prefix = ['embarked'])
train_df = pd.get_dummies(train_df, columns=['Name'], prefix = ['name'])



In [40]:
# Splitting to features and dependant variable
X = train_df.drop('Survived', 1)
y = train_df.Survived

# Scaling Features
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1))
X = pd.DataFrame(scaler.fit_transform(X))

### Training model

In [41]:
probabilities = {0: 0.676, 1: 0.324}
poly_SVC = SVC(kernel="poly", C=4, class_weight=probabilities)
poly_SVC.fit(X,y)





SVC(C=4, cache_size=200, class_weight={0: 0.676, 1: 0.324}, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='poly', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

### Test Pipeline

In [43]:
# Removing PassengerId
test_df = test_df.drop('PassengerId', axis=1)

# Convering 'Name'
name_df = test_df.Name.str.split(expand = True)
test_df['Name'] = name_df[1]
test_df.loc[(test_df['Name'] != 'Mr.') & (test_df['Name'] != 'Mrs.') & (test_df['Name'] != 'Miss.') & (test_df['Name'] != 'Master.'), 'Name'] = 'Other'

# Converting 'Ticket'
test_df['Ticket'] = test_df['Ticket'].map(test_df['Ticket'].value_counts()).astype('int64')

# Converting 'Cabin'
test_df['Cabin'] = test_df['Cabin'].notnull().astype('int64')

# Remove missing values of 'Embarked'
test_df = test_df[test_df.Embarked.notnull()]
# Fill missing values with mean value
test_df = test_df.fillna(test_df.Age.mean())

# Encoding 'Sex' categorical data into a numeric columns
test_df.loc[test_df.Sex == 'male', 'Sex'] = 0
test_df.loc[test_df.Sex == 'female', 'Sex'] = 1
test_df['Sex'].astype('int64')
# Encoding 'Name' categorical data using 'One Hot' method - get_dummies in pandas
test_df = pd.get_dummies(test_df, columns=['Embarked'], prefix = ['embarked'])
test_df = pd.get_dummies(test_df, columns=['Name'], prefix = ['name'])

# Fill all missing values with mean value of each column
test_df = test_df.fillna(test_df.mean())
    

### Generating Predictions

In [44]:
predictions = poly_SVC.predict(test_df)

In [45]:
submission = pd.DataFrame({
        "PassengerId": pd.read_csv("test.csv")["PassengerId"],
        "Survived": predictions
    })
submission.to_csv('submission.csv', index=False)