In [22]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder,OneHotEncoder

In [3]:
DATA_PATH = ''
train_path = os.path.join(DATA_PATH,'data','train.csv')
titanic_df = pd.read_csv(train_path)

In [4]:
titanic_df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [54]:
def prepare_data(df):
    edited_df = df.drop(['Name'],axis=1)
    ordinal_encoder = OrdinalEncoder()
    edited_df["Sex"] = ordinal_encoder.fit_transform(edited_df[["Sex"]])
    edited_df.set_index('PassengerId',inplace=True)
    edited_df.drop(columns = ['Ticket','Cabin'],inplace = True)
    imputer = SimpleImputer(strategy="median")
    edited_df['Age'] = imputer.fit_transform(edited_df[['Age']])
    edited_df['Fare'] = imputer.fit_transform(edited_df[['Fare']])
    edited_df = edited_df.dropna()
    oneHot = OneHotEncoder()
#     edited_df["Embarked"] = oneHot.fit_transform(edited_df[['Embarked']])
    return edited_df

In [55]:
X_train = prepare_data(titanic_df)

In [56]:
X_train

Unnamed: 0_level_0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,0,3,1.0,22.0,1,0,7.2500,S
2,1,1,0.0,38.0,1,0,71.2833,C
3,1,3,0.0,26.0,0,0,7.9250,S
4,1,1,0.0,35.0,1,0,53.1000,S
5,0,3,1.0,35.0,0,0,8.0500,S
...,...,...,...,...,...,...,...,...
887,0,2,1.0,27.0,0,0,13.0000,S
888,1,1,0.0,19.0,0,0,30.0000,S
889,0,3,0.0,28.0,1,2,23.4500,S
890,1,1,1.0,26.0,0,0,30.0000,C


In [57]:
X_train.Embarked.value_counts()

S    644
C    168
Q     77
Name: Embarked, dtype: int64

In [58]:
oneHot = OneHotEncoder()
embarked = oneHot.fit_transform(X_train[['Embarked']])

In [61]:
embarked = pd.DataFrame(embarked.toarray(),columns = oneHot.categories_,index=X_train.index)

In [93]:
y_train = X_train['Survived']
X_train = X_train.drop(['Survived'],axis=1)

In [62]:
X_train = X_train.join(embarked)
# X_train['Embarked'].toarray()

In [64]:
X_train.drop('Embarked',axis=1,inplace=True)

In [65]:
from sklearn.linear_model import SGDClassifier

In [66]:
sgd_clf = SGDClassifier(random_state=42)
sgd_clf.fit(X_train, y_train)

SGDClassifier(random_state=42)

In [94]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(sgd_clf, X_train, y_train,
scoring="accuracy", cv=10)

In [95]:
scores

array([0.61797753, 0.61797753, 0.65168539, 0.68539326, 0.41573034,
       0.66292135, 0.61797753, 0.71910112, 0.70786517, 0.45454545])

In [96]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float64))
cross_val_score(sgd_clf, X_train_scaled, y_train, cv=10, scoring="accuracy")

array([0.62921348, 0.80898876, 0.69662921, 0.79775281, 0.79775281,
       0.78651685, 0.7752809 , 0.75280899, 0.70786517, 0.76136364])

In [97]:
sgd_clf.fit(X_train_scaled,y_train)

SGDClassifier(random_state=42)

In [72]:
test_path = os.path.join(DATA_PATH,'data','test.csv')

In [78]:
test_df = pd.read_csv(test_path)

In [79]:
test_df

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


In [80]:
X_test = prepare_data(test)

In [83]:
embarked_test = oneHot.fit_transform(X_test[['Embarked']])
embarked_test = pd.DataFrame(embarked_test.toarray(),columns=oneHot.categories_,index=X_test.index)

In [92]:
X_train

Unnamed: 0_level_0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,"(C,)","(Q,)","(S,)"
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,0,3,1.0,22.0,1,0,7.2500,0.0,0.0,1.0
2,1,1,0.0,38.0,1,0,71.2833,1.0,0.0,0.0
3,1,3,0.0,26.0,0,0,7.9250,0.0,0.0,1.0
4,1,1,0.0,35.0,1,0,53.1000,0.0,0.0,1.0
5,0,3,1.0,35.0,0,0,8.0500,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...
887,0,2,1.0,27.0,0,0,13.0000,0.0,0.0,1.0
888,1,1,0.0,19.0,0,0,30.0000,0.0,0.0,1.0
889,0,3,0.0,28.0,1,2,23.4500,0.0,0.0,1.0
890,1,1,1.0,26.0,0,0,30.0000,1.0,0.0,0.0


In [84]:
X_test = X_test.join(embarked_test)

In [86]:
X_test.drop(['Embarked'],axis=1,inplace=True)

In [88]:
X_test

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Fare,"(C,)","(Q,)","(S,)"
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
892,3,1.0,34.5,0,0,7.8292,0.0,1.0,0.0
893,3,0.0,47.0,1,0,7.0000,0.0,0.0,1.0
894,2,1.0,62.0,0,0,9.6875,0.0,1.0,0.0
895,3,1.0,27.0,0,0,8.6625,0.0,0.0,1.0
896,3,0.0,22.0,1,1,12.2875,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...
1305,3,1.0,27.0,0,0,8.0500,0.0,0.0,1.0
1306,1,0.0,39.0,0,0,108.9000,1.0,0.0,0.0
1307,3,1.0,38.5,0,0,7.2500,0.0,0.0,1.0
1308,3,1.0,27.0,0,0,8.0500,0.0,0.0,1.0


In [89]:
X_test_scaled = scaler.fit_transform(X_test.astype(np.float64))

In [98]:
y_test = sgd_clf.predict(X_test_scaled)

In [99]:
y_test

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,
       1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,

In [100]:
result = pd.DataFrame()

In [101]:
result['PassengerId'] = X_test.index

In [102]:
result

Unnamed: 0,PassengerId
0,892
1,893
2,894
3,895
4,896
...,...
413,1305
414,1306
415,1307
416,1308


In [103]:
result['Survived'] = y_test

In [104]:
result.set_index('PassengerId',inplace=True)

In [106]:
result.to_csv('result.csv')

In [107]:
from sklearn.ensemble import RandomForestClassifier
rf_clf=RandomForestClassifier(n_estimators=100)
rf_clf.fit(X_train,y_train)

RandomForestClassifier()

In [123]:
cross_val_score(rf_clf, X_train, y_train, cv=10, scoring="accuracy")

array([0.74157303, 0.80898876, 0.76404494, 0.83146067, 0.8988764 ,
       0.82022472, 0.82022472, 0.76404494, 0.82022472, 0.84090909])

In [109]:
X_train_scaled = scaler.fit_transform(X_train.astype(np.float64))
cross_val_score(rf_clf, X_train_scaled, y_train, cv=10, scoring="accuracy")

array([0.73033708, 0.80898876, 0.76404494, 0.86516854, 0.88764045,
       0.80898876, 0.83146067, 0.76404494, 0.83146067, 0.85227273])

In [114]:
rf_clf.fit(X_train,y_train)

RandomForestClassifier()

In [115]:
y_test_rf = rf_clf.predict(X_test)

In [161]:
def get_result(file_name:str,y_test_df):
    result = pd.DataFrame()
    result['PassengerId'] = X_test.index
    result['Survived'] = y_test_df
    result.set_index('PassengerId',inplace=True)
    result1.to_csv(file_name+'.csv')

In [117]:
y_test_rf

array([0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
       0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,

In [118]:
X_test_scaled = scaler.fit_transform(X_test.astype(np.float64))

In [120]:
rf_clf.fit(X_train_scaled,y_train)
y_test_rf = rf_clf.predict(X_test_scaled)

In [121]:
y_test_rf

array([0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0,
       0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,
       0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,

In [122]:
result1 = pd.DataFrame()
result1['PassengerId'] = X_test.index
result1['Survived'] = y_test_rf
result1.set_index('PassengerId',inplace=True)
result1.to_csv('result2.csv')

In [124]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()

In [126]:
lin_reg.fit(X_train,y_train)

LinearRegression()

In [129]:
X_train

Unnamed: 0_level_0,Pclass,Sex,Age,SibSp,Parch,Fare,"(C,)","(Q,)","(S,)"
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,3,1.0,22.0,1,0,7.2500,0.0,0.0,1.0
2,1,0.0,38.0,1,0,71.2833,1.0,0.0,0.0
3,3,0.0,26.0,0,0,7.9250,0.0,0.0,1.0
4,1,0.0,35.0,1,0,53.1000,0.0,0.0,1.0
5,3,1.0,35.0,0,0,8.0500,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...
887,2,1.0,27.0,0,0,13.0000,0.0,0.0,1.0
888,1,0.0,19.0,0,0,30.0000,0.0,0.0,1.0
889,3,0.0,28.0,1,2,23.4500,0.0,0.0,1.0
890,1,1.0,26.0,0,0,30.0000,1.0,0.0,0.0


In [130]:
y_train

PassengerId
1      0
2      1
3      1
4      1
5      0
      ..
887    0
888    1
889    0
890    1
891    0
Name: Survived, Length: 889, dtype: int64

In [133]:
from sklearn.tree import DecisionTreeRegressor
tree_reg = DecisionTreeRegressor()

In [138]:
scores = cross_val_score(tree_reg,X_train_scaled,y_train,cv=10,scoring="neg_mean_squared_error")

In [139]:
tree_rmse_scores = np.sqrt(-scores)

In [140]:
def display_scores(scores):
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Standard deviation:", scores.std())

display_scores(tree_rmse_scores)

Scores: [0.51476332 0.43000902 0.52368601 0.49937539 0.43100032 0.38112001
 0.4629698  0.50558677 0.40351433 0.4239887 ]
Mean: 0.4576013655451777
Standard deviation: 0.04804656750744551


In [142]:
lin_scores = cross_val_score(lin_reg, X_train_scaled, y_train,
scoring="neg_mean_squared_error", cv=10)
lin_rmse_scores = np.sqrt(-lin_scores)
display_scores(lin_rmse_scores)

Scores: [0.3876537  0.37685463 0.39640035 0.39437543 0.3894147  0.37471652
 0.39573245 0.40360018 0.33220402 0.37529545]
Mean: 0.382624742944612
Standard deviation: 0.019242719752808824


In [145]:
    from sklearn.model_selection import GridSearchCV

    forest = RandomForestClassifier(random_state = 1)
    n_estimators = [100, 300, 500, 800, 1200]
    max_depth = [5, 8, 15, 25, 30]
    min_samples_split = [2, 5, 10, 15, 100]
    min_samples_leaf = [1, 2, 5, 10] 

    hyperF = dict(n_estimators = n_estimators, max_depth = max_depth,  
                  min_samples_split = min_samples_split, 
                 min_samples_leaf = min_samples_leaf)

    gridF = GridSearchCV(forest, hyperF, cv = 3, verbose = 1, 
                          n_jobs = -1)
    bestF = gridF.fit(X_train, y_train)

Fitting 3 folds for each of 500 candidates, totalling 1500 fits


In [146]:
gridF.best_params_

{'max_depth': 15,
 'min_samples_leaf': 2,
 'min_samples_split': 15,
 'n_estimators': 800}

In [147]:
gridF.best_estimator_

RandomForestClassifier(max_depth=15, min_samples_leaf=2, min_samples_split=15,
                       n_estimators=800, random_state=1)

In [168]:
best_rf_clf = RandomForestClassifier(random_state = 1, max_depth = 15, n_estimators = 900,
                                     min_samples_split = 15, min_samples_leaf = 2)

In [169]:
rf_model = best_rf_clf.fit(X_train,y_train)

In [170]:
cross_val_score(best_rf_clf,X_train,y_train,cv=10,scoring="accuracy")

array([0.7752809 , 0.80898876, 0.74157303, 0.87640449, 0.93258427,
       0.85393258, 0.82022472, 0.7752809 , 0.88764045, 0.82954545])

In [156]:
cross_val_score(best_rf_clf,X_train_scaled,y_train,cv=10,scoring="accuracy")

array([0.78651685, 0.80898876, 0.74157303, 0.87640449, 0.93258427,
       0.85393258, 0.82022472, 0.7752809 , 0.88764045, 0.82954545])

In [171]:
y_test1 = rf_model.predict(X_test)

In [172]:
get_result("best_result",y_test1)

In [173]:
y_test1

array([0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,
       0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,

In [176]:
X_train1 = X_train[['Sex','Age','Pclass','Fare']]

In [177]:
X_train1

Unnamed: 0_level_0,Sex,Age,Pclass,Fare
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1.0,22.0,3,7.2500
2,0.0,38.0,1,71.2833
3,0.0,26.0,3,7.9250
4,0.0,35.0,1,53.1000
5,1.0,35.0,3,8.0500
...,...,...,...,...
887,1.0,27.0,2,13.0000
888,0.0,19.0,1,30.0000
889,0.0,28.0,3,23.4500
890,1.0,26.0,1,30.0000


In [178]:
rf_model1 = best_rf_clf.fit(X_train1,y_train)

In [179]:
cross_val_score(rf_model1,X_train,y_train,cv=10,scoring="accuracy")

array([0.7752809 , 0.80898876, 0.74157303, 0.87640449, 0.93258427,
       0.85393258, 0.82022472, 0.7752809 , 0.88764045, 0.82954545])

In [180]:
X_test1 = X_test[['Sex','Age','Pclass','Fare']]

In [181]:
y_test1 = rf_model1.predict(X_test1)

In [182]:
get_result('final_result1',y_test1)

In [183]:
from sklearn.svm import SVC
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, y_train)

SVC(kernel='linear')

In [184]:
cross_val_score(svclassifier,X_train,y_train,cv=10,scoring="accuracy")

array([0.80898876, 0.79775281, 0.76404494, 0.84269663, 0.79775281,
       0.7752809 , 0.76404494, 0.74157303, 0.80898876, 0.76136364])