![img](https://developer.nvidia.com/sites/default/files/pictures/2018/rapids/rapids-logo.png)

In [None]:
#Use this code to load Rapids!
import sys
!cp ../input/rapids/rapids.0.18.0 /opt/conda/envs/rapids.tar.gz
!cd /opt/conda/envs/ && tar -xzvf rapids.tar.gz > /dev/null
sys.path = ["/opt/conda/envs/rapids/lib/python3.7/site-packages"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib/python3.7"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib"] + sys.path 
!cp /opt/conda/envs/rapids/lib/libxgboost.so /opt/conda/lib/

In [None]:
#importing libraries
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import xgboost
!pip install pycomp
import cudf
import cuml


from cuml.metrics import accuracy_score
from cuml.preprocessing.model_selection import train_test_split
from cuml.preprocessing.TargetEncoder import TargetEncoder
from random import randint
from pycomp.viz.insights import *
from cuml.linear_model import Ridge
from cuml.svm import SVR
from cuml.neighbors import KNeighborsClassifier


In [None]:
train_df = cudf.read_csv('../input/tabular-playground-series-apr-2021/train.csv', index_col='PassengerId')
test_df = cudf.read_csv('../input/tabular-playground-series-apr-2021/test.csv', index_col='PassengerId')


In [None]:
train_df = train_df.fillna(0)

In [None]:
#encoding
encoder = TargetEncoder(n_folds=5, smooth=0.001, split_method='interleaved')

In [None]:

X = train_df.drop(["Survived"],axis=1)
y = train_df["Survived"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, 
                                                    shuffle=False, stratify=y)
for col in train_df.columns:
    if train_df[col].dtype=='object': 
        X_train[col] = encoder.fit_transform(X_train[col],y_train)
        X_test[col] = encoder.transform(X_test[col])
        test_df[col] = encoder.transform(test_df[col])

In [None]:
#model training
def train_model(model, X_train, y_train, X_test, y_test, model_name):
    
    model.fit(X_train, y_train)
    predicts = model.predict(X_test)
    acc = accuracy_score(y_test, predicts)
    
    
    print("\t\t\t--- Model:", model_name,"---")
    print("Accuracy: ", acc,"\t\t\t","\n")

In [None]:
#creaing Ridge and SVR
ridge = Ridge(fit_intercept = True, normalize = False,solver = "eig")

svr = SVR(kernel='rbf', gamma='scale', C=1, epsilon=0.3)


models = [ridge,svr]
names = ["Ridge","SVR"]

for i in range(0,len(models)):
    train_model(model=models[i], X_train=X_train, y_train=y_train, X_test=X_test,y_test=y_test, model_name=names[i])

In [None]:
dtrain = xgboost.DMatrix(X_train,y_train)
dval   = xgboost.DMatrix(X_test, y_test)
dtest  = xgboost.DMatrix(test_df)

params1 = { 'objective': 'binary:logistic',
    'booster': 'gbtree',
    'tree_method': 'gpu_hist',
    'eval_metric': 'auc',
    'random_state': 42,
    'max_depth': 15,
    'learning_rate': 0.03,
    'min_child_weight': 20,
    'gamma': 0.1,
    'alpha': 0.2,
    'lambda': 9,
    'colsample_bytree': 0.2,
    'subsample': 0.8}

evallist = [(dval, 'validation'), (dtrain, 'train')]
num_round=50

In [None]:
xgb = xgboost.train(params1, dtrain,num_round,evallist)

predicts = xgb.predict(xgboost.DMatrix(X_test))
acc = accuracy_score(y_test, predicts)
acc

In [None]:
preds1 = xgb.predict(xgboost.DMatrix(test_df))


ids = cudf.read_csv('../input/tabular-playground-series-apr-2021/sample_submission.csv')[["PassengerId"]].values

In [None]:
pred = []
for i in preds1:
    if i >= 0.5:
        pred.append(1)
    else:
        pred.append(0)

In [None]:
df_sub = {'PassengerId': ids, 'Survived': pred}
df_predictions = cudf.DataFrame(df_sub).set_index(['PassengerId'])
df_predictions.head(10)

# Interpretability 

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20,13)

xgboost.plot_importance(xgb)
plt.title("xgboost.plot_importance(model)")
plt.show()

In [None]:
xgboost.plot_importance(xgb, importance_type="cover")
plt.title('xgboost.plot_importance(model, importance_type="cover")')
plt.show()

In [None]:
xgboost.plot_importance(xgb, importance_type="gain")
plt.title('xgboost.plot_importance(model, importance_type="gain")')
plt.show()

In [None]:
import shap
# this takes a minute or two since we are explaining over 30 thousand samples in a model with over a thousand trees
explainer = shap.TreeExplainer(xgb)
shap_values = explainer.shap_values(X_train)

In [None]:
shap.summary_plot(shap_values, X_train, plot_type="bar")


In [None]:
from eli5 import show_weights
show_weights(xgb)