# Machine Learning - Decision Tree

## Install Sci-Kit Learn

In [None]:
### Install Sci-Kit Learn
!pip install scikit-learn

## Import Libraries

In [None]:
## Begin Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
## End Imports

## Pokemon Dataset

<center><img src="../images/web/pokemon.webp"></center>

### Load Dataset

In [None]:
### Load Pokemon CSV
file = "../data/pokemon.csv"

pokemon = pd.read_csv(file, index_col=0)

### View Info

In [None]:
### View Info


### Cleaning Data

In [None]:
## Fill Null Values with "None"




### Preview First Few Rows

In [None]:
### Preview First Few Rows




### Visualize Distribution of Data

In [None]:
### Distribution of Data
pokemon.hist(figsize=(12,7),
             bins=30,
             edgecolor="black")

plt.subplots_adjust(hspace=0.7,
                    wspace=0.4)
plt.suptitle("Distribution of Data")
plt.tight_layout()

### Visualize Pokemon Type Count

#### Get Value Counts

__Syntax__:
```python
counts = dataframe[column_name].value_counts()
```

In [None]:
### Output Unique Types
pokemon_type_count = 



#### Visualize Types as Bar Plot

__Syntax__:
```python
sns.barplot(x=x_value, y=y_value)
```

In [None]:
# Bar Plot
plt.figure(figsize=(10,7))

sns.barplot(x=pokemon_type_count.values, 
            y=pokemon_type_count.index, 
            palette='viridis', 
            hue=pokemon_type_count.index)

plt.title("Pokemon Type Count", fontsize=14)
plt.xlabel("Count")
plt.ylabel("Type")

plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.show()

### Prepare the Data

#### Define Features (`X`) and Target (`y`)

In [None]:
# Define Features and Target
features = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total', 'Legendary', 'Generation']
target = 'Type 1'






### Split up the Data

In [None]:
# Split the data into training and testing sets (80/20 split)





## Build Model - Decision Tree

### Build Model

In [None]:
## Import and Build Decision Tree
clf = 

### Train Model

__Syntax__:
```python
model.fit(x_training_data, y_training_data)
```

In [None]:
## Train Model




### Get Predictions

__Syntax:__
```python
predictions = clf.predict(test_data)
```

In [None]:
## Get Predictions




### View Predictions vs Actual Data

In [None]:
### Create DataFrame of Predictions










### Evaluate Model

### Classification Report

Syntax:

```python
classification_report(Y_TEST, Y_PREDICTIONS)
```

In [None]:
### Classification Report Results




### Improve the Data

Focus on predicting one Pokémon type.

Create a new column called `Is_Electric` which contains `True` if the Pokémon is Electric and `False` is the Pokémon is not electric.

In [None]:
## Improve the Data - Electric vs Non-Electric
pokemon['Is_Electric'] = 



### Inspect Data

Get the Value Counts of the new columns.

In [None]:
# Value Counts


### Set Up Features and Target

In [None]:
# Set Up Features and Target Again
features = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total', 'Legendary', 'Generation']
target = 'Is_Electric'



### Set Up Training Data

In [None]:
# Set Up Training and Testing Data
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

### Set Up and Train Model

In [None]:
# Set Up Model



# Train Model


# Get Predictions



### Compare Actual Values vs Predicted Values

In [None]:
# Compare Predictions and Actual Values










### Model Evaluation

In [None]:
### Classification Report Results



