# A Cool Model

In [None]:
import json
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.decomposition import PCA

### Load Dataset

In [None]:
X = pd.read_csv("data/X_train.zip")
y = pd.read_csv("data/y_train.zip")

### Adjust Goal

In [None]:
X["adjusted_goal"] = X.goal * X.static_usd_rate
X.drop(["goal", "static_usd_rate"], axis=1, inplace=True)

### Extract Categories

In [None]:
def _get_slug(x):
    categories = json.loads(x).get("slug", "/").split("/")

    return categories

X["gen_cat"] = X["category"].apply(lambda x: _get_slug(x)[0])
X["precise_cat"] = X["category"].apply(lambda x: _get_slug(x)[1])

X.drop("category", axis=1, inplace=True)

### Keep only the columns of interest

In [None]:
X = X[["gen_cat", "precise_cat", "adjusted_goal", "disable_communication"]]

In [None]:
X.head()

### One Hot Encoding

In [None]:
ohe = OneHotEncoder(sparse=False, handle_unknown="ignore")
X_cat = pd.DataFrame(ohe.fit_transform(X[["gen_cat", "precise_cat"]]))
X = pd.concat([X, X_cat], axis=1)

X.drop(["gen_cat", "precise_cat"], axis=1, inplace=True)

In [None]:
X.head()

### Dimensionality Reduction - PCA

In [None]:
pca = PCA(n_components=15)
X = pca.fit_transform(X)

In [None]:
X

### Train model

In [None]:
model = DecisionTreeClassifier(max_depth=9, min_samples_split=5)
model.fit(X, y)