In [None]:
import pandas as pd
train=pd.read_csv('./data/preprocessed/train1006.csv')
test=pd.read_csv('./data/preprocessed/test1006.csv')

In [None]:
X=train.drop(columns=['Survived'])
y=train['Survived']

# modeling

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,
                                                    test_size = 0.3,
                                                    stratify = y,
                                                    random_state = 2045)

print('Train Data : ', X_train.shape, y_train.shape)
print('Test Data : ', X_test.shape, y_test.shape)

In [None]:
from sklearn.linear_model import LogisticRegression

LR1 = LogisticRegression(C = 0.3,
                        penalty = 'l2',
                        multi_class = 'multinomial',
                        n_jobs = -1)

LR1.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score

print(accuracy_score(y_test, LR1.predict(X_test)), '\n')
print(confusion_matrix(y_test, LR1.predict(X_test)), '\n')
print(roc_auc_score(y_test, LR1.predict(X_test)))

In [None]:
LR2 = LogisticRegression(C = 0.3,
                        penalty = 'l2',
                        multi_class = 'multinomial',
                        n_jobs = -1)

LR2.fit(X, y)

In [None]:
pred=LR2.predict(test)
sub=pd.read_csv('./submissions/submission.csv')
sub['Survived']=pred
sub.to_csv('./submissions/LR_1.csv',index=False)

# tuning

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold, train_test_split

In [None]:
params={'C': [0.01, 0.03, 0.05, 0.1, 0.15 , 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 1,3,5,10]}

In [None]:
import sklearn
sklearn.metrics.SCORERS.keys()

In [None]:
Model_LR=LogisticRegression()

gcv_LR=GridSearchCV(Model_LR,
                    param_grid=params,
                    scoring='roc_auc',
                    cv=KFold(n_splits=5,
                             shuffle=True,
                             random_state=2045),
                   refit=True)

In [None]:
X=train.drop(columns=['Survived'])
y=train['Survived']
gcv_LR.fit(X,y)

In [None]:
gcv_LR.best_params_

In [None]:
gcv_LR.best_score_

In [None]:
pred=gcv_LR.predict(test)
sub=pd.read_csv('./submissions/submission.csv')
sub['Survived']=pred
sub.to_csv('./submissions/GCV_LR_1.csv',index=False)

# VC

In [None]:
import pandas as pd
train=pd.read_csv('./data/preprocessed/train1006.csv')
test=pd.read_csv('./data/preprocessed/test1006.csv')

In [None]:
X=train.drop(columns=['Survived'])
y=train['Survived']

In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

## RF & LR

In [None]:
RF=RandomForestClassifier()
LR = LogisticRegression(C = 3,
                        penalty = 'l2',
                        multi_class = 'multinomial',
                        n_jobs = -1)

VC1=VotingClassifier(estimators=[('rf',RF),('lr',LR)],voting='soft')

In [None]:
VC1.fit(X,y)

In [None]:
pred=VC1.predict(test)
sub=pd.read_csv('./submissions/submission.csv')
sub['Survived']=pred
sub.to_csv('./submissions/VC_by_RF_and_LR.csv',index=False)

In [None]:
pred

## XGB & LR

In [None]:
XGB=XGBClassifier()
LR = LogisticRegression(C = 3,
                        penalty = 'l2',
                        multi_class = 'multinomial',
                        n_jobs = -1)

VC2=VotingClassifier(estimators=[('xgb',XGB),('lr',LR)],voting='soft')
VC2.fit(X,y)

In [None]:
pred=VC2.predict(test)
sub=pd.read_csv('./submissions/submission.csv')
sub['Survived']=pred
sub.to_csv('./submissions/VC_by_XGB_and_LR.csv',index=False)

In [None]:
pred

## XGB & RF & LR

In [None]:
RF=RandomForestClassifier()
XGB=XGBClassifier()
LR = LogisticRegression(C = 3,
                        penalty = 'l2',
                        multi_class = 'multinomial',
                        n_jobs = -1)

VC3=VotingClassifier(estimators=[('rf',RF),('xgb',XGB),('lr',LR)],voting='soft')
VC3.fit(X,y)

In [None]:
pred=VC3.predict(test)
sub=pd.read_csv('./submissions/submission.csv')
sub['Survived']=pred
sub.to_csv('./submissions/VC_by_XGB&RF&LR.csv',index=False)