In [None]:
!pip install seaborn
!pip install xgboost
!pip install lightgbm

## PairPlot, HeatMap

In [None]:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [None]:
iris = sns.load_dataset("iris")

In [None]:
iris.head(3)

In [None]:
sns.pairplot(iris, height=2)

In [None]:
sns.pairplot(iris, hue="species", height=2)

In [None]:
colormap = plt.cm.RdBu
plt.figure(figsize=(9, 7))
sns.heatmap(iris.drop("species", axis=1).astype(float).corr(),linewidths=0.1,vmax=1.0, 
            square=True, cmap=colormap, linecolor='white', annot=True)

In [None]:
ttc = sns.load_dataset("titanic")
ttc.head(3)

In [None]:
for _ in ttc:
    if _ == "sex":
        ttc[_] = ttc['sex'].map( {'female': 0, 'male': 1} ).astype(int)
    if _ == "alone":
        ttc[_] = ttc['alone'].map( {False: 0, True: 1} ).astype(int)
    if _ == "age":
        ttc[_] = ttc['age'].fillna(ttc['age'].median())
drop_elements = ['class', 'who', 'adult_male', 'deck', 'embark_town', 'embarked', 'alive']
ttc = ttc.drop(drop_elements, axis=1)
ttc.head(3)

In [None]:
features = ttc.drop("survived", axis=1)

In [None]:
sns.pairplot(features)

In [None]:
colormap = plt.cm.RdBu
plt.figure(figsize=(14, 12))
sns.heatmap(features.astype(float).corr(),linewidths=0.1,vmax=1.0, 
            square=True, cmap=colormap, linecolor='white', annot=True)

## DecisionTree

In [None]:
from sklearn import tree

In [None]:
target = ttc["survived"]
features = ttc.drop("survived", axis=1)
features.head(3)

In [None]:
clf = tree.DecisionTreeClassifier(max_depth=3)
clf.fit(features, target)

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(8, 8), dpi=300)
tree.plot_tree(clf, 
               feature_names = features.keys(), 
               class_names=['death', 'survived'],
               filled = True);

In [None]:
for n, v in zip(features, clf.feature_importances_):
    print(f'importance of {n} is :{v}')

### RandomForest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
train_x, test_x, train_y, test_y = train_test_split(
    features, target, test_size=0.2, shuffle=True)

In [None]:
random_forest = RandomForestClassifier(max_depth=3, n_estimators=30, random_state=42)
random_forest.fit(train_x, train_y)
y_pred = random_forest.predict(test_x)
accuracy_random_forest = accuracy_score(test_y, y_pred)
print('Accuracy: {}'.format(accuracy_random_forest))

In [None]:
for n, v in zip(features, random_forest.feature_importances_):
    print(f'importance of {n} is :{v}')

### xgboost

In [None]:
import xgboost as xgb

In [None]:
param = {
    'max_depth': 3, 
    'eta': 1, 
    'num_class': 2
}
num_round = 10
dtrain = xgb.DMatrix(train_x, label=train_y)
bst = xgb.train(param, dtrain, num_round)

In [None]:
dtest = xgb.DMatrix(test_x)
pred = bst.predict(dtest)
score = accuracy_score(test_y, pred)
print('Accuracy: {0:.4f}'.format(score))

In [None]:
xgb.plot_importance(bst)

### lightGBM

In [None]:
import lightgbm as lgb

In [None]:
train_data = lgb.Dataset(train_x, label=train_y)
eval_data = lgb.Dataset(test_x, label=test_y, reference= train_data)

In [None]:
params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'multiclass',
    'num_class': 2,
    'verbose': 2,
}

In [None]:
gbm = lgb.train(
    params,
    train_data,
    valid_sets=eval_data,
    num_boost_round=10,
    verbose_eval=5,
)

In [None]:
preds = gbm.predict(test_x)

In [None]:
pred = []
for x in preds:
    pred.append(np.argmax(x))
accuracy_score(test_y, pred)

In [None]:
lgb.plot_importance(gbm)