# Task for Today  

***

## SUV Data Multiple Target Prediction  

Given *data about SUVs*, let's try to predict **a variety of target variables** in the data.

We will use a decision tree model to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import FunctionTransformer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier

In [None]:
data = pd.read_csv('../input/suv-purchase-decision/SUV_Purchase.csv')

In [None]:
data

In [None]:
data.info()

# Model Pipeline

In [None]:
def predict_on_raw_data(df, target, task):
    df = df.copy()
    
    # Drop ID column
    df = df.drop('User ID', axis=1)
    
    # Split df into X and y
    y = df[target]
    X = df.drop(target, axis=1)
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=1)
    
    # Build pipeline
    binary_encoder = Pipeline(steps=[
        ('function', FunctionTransformer(lambda column: column.replace({'Female': 0, 'Male': 1})))
    ])
    
    preprocessor = ColumnTransformer(transformers=[
        ('binary', binary_encoder, ['Gender'])
    ], remainder='passthrough')
    
    if target == 'Gender':
        model = DecisionTreeRegressor() if task == 'regression' else DecisionTreeClassifier()
    else:
        model = Pipeline(steps=[
            ('preprocessor', preprocessor),
            ('mod', DecisionTreeRegressor() if task == 'regression' else DecisionTreeClassifier())
        ])
    
    # Fit model
    model.fit(X_train, y_train)
    
    # Get results
    result = model.score(X_test, y_test)
    
    return result

# Results

In [None]:
# Classification
gender_acc = predict_on_raw_data(data, target='Gender', task='classification')
purchased_acc = predict_on_raw_data(data, target='Purchased', task='classification')

# Regression
age_r2 = predict_on_raw_data(data, target='Age', task='regression')
salary_r2 = predict_on_raw_data(data, target='EstimatedSalary', task='regression')

In [None]:
print("Target: Gender (Accuracy): {:.2f}%".format(gender_acc * 100))
print("Target: Purchased (Accuracy): {:.2f}%".format(purchased_acc * 100))
print("Target: Age (R^2): {:.4f}".format(age_r2))
print("Target: EstimatedSalary (R^2): {:.4f}".format(salary_r2))

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/YlUqKMIgeUg