# rough_svm_example.py
---
### Importing the libraries

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None  # default='warn'

### Importing the dataset

In [2]:
dataset = pd.read_csv('./task/titanicTrain.csv')
dataset.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1.0,1.0,"Allen, Miss. Elisabeth Walton",female,29.0,0.0,0.0,24160,211.3375,B5,S,2.0,,"St Louis, MO"
1,1.0,1.0,"Allison, Master. Hudson Trevor",male,0.9167,1.0,2.0,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON"
2,1.0,0.0,"Allison, Miss. Helen Loraine",female,2.0,1.0,2.0,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
3,1.0,0.0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1.0,2.0,113781,151.55,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"
4,1.0,0.0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1.0,2.0,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"


In [3]:
data = dataset.iloc[:, [1, 11]]
data.loc[:,'boat'] = data.boat.fillna(value=-1)
data.loc[data.boat!=-1, 'boat'] = 1
data.loc[data.boat==-1, 'boat']=0
data.dropna(axis=0, inplace=True)
data.survived = data.survived.astype('int')
data.head()

Unnamed: 0,survived,boat
0,1,1
1,1,1
2,0,0
3,0,0
4,0,0


In [4]:
data.groupby(['boat', 'survived'])['survived'].count()

boat  survived
0     0           569
      1            14
1     0             8
      1           409
Name: survived, dtype: int64

In [5]:
data['predict'] = data.boat == data.survived
data.head()

Unnamed: 0,survived,boat,predict
0,1,1,True
1,1,1,True
2,0,0,True
3,0,0,True
4,0,0,True


In [6]:
predict = pd.read_csv('./task/titanicQuestion.csv')
predict.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,3,,"McCormack, Mr. Thomas Joseph",male,,0,0,367228,7.75,,Q,,,
1,3,,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,,Q,16.0,,
2,3,,"McCoy, Miss. Alicia",female,,2,0,367226,23.25,,Q,16.0,,
3,3,,"McCoy, Mr. Bernard",male,,2,0,367226,23.25,,Q,16.0,,
4,3,,"McDermott, Miss. Brigdet Delia",female,,0,0,330932,7.7875,,Q,13.0,,


In [20]:
mask = ~predict['boat'].isnull()
mask = mask.replace(True, 1)
mask.to_csv('./task/Prediction.csv', index = False, col='prediction')
mask

TypeError: to_csv() got an unexpected keyword argument 'col'

In [None]:
X = data.iloc[:, 1].values
pd.DataFrame(X).head()

In [None]:
y = data.iloc[:, 0].values.astype('int')
pd.DataFrame(y).head()

### Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

### Feature Scaling

In [None]:
'''
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
pd.DataFrame(X_train).head()
X_test = sc.transform(X_test)
pd.DataFrame(X_test).head()
'''

### Fitting SVM to the Training set

In [None]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'linear', random_state = 0)
classifier.fit(X_train, y_train)

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(X_train, y_train)
cm_frame = pd.DataFrame(cm)
cm_frame.index.name =  'Prediction'
cm_frame.columns.name = 'Original'
cm_frame

### Predicting the Test set results

In [None]:
y_pred = classifier.predict(X_test)

### Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm_frame = pd.DataFrame(cm)
cm_frame.index.name =  'Prediction'
cm_frame.columns.name = 'Original'
cm_frame

### Visualising the Training set results

In [None]:
from matplotlib.colors import ListedColormap
X_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.5, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('SVM (Training set)')
plt.xlabel('Age')
plt.ylabel('Salary')
plt.legend()
plt.show()

### Visualising the Test set results

In [None]:
from matplotlib.colors import ListedColormap
X_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.5, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('SVM (Test set)')
plt.xlabel('Age')
plt.ylabel('Salary')
plt.legend()
plt.show()