# Task for Today  

***

## Abalone Attribute Prediction  

Given *data about abalone*, let's try to predict **multiple attributes** of a given organism.  
  
We will use linear regression and logistic regression models to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression, LogisticRegression

In [None]:
data = pd.read_csv('../input/abalone-uci/abalone_original.csv')

In [None]:
data

In [None]:
data.info()

# Preprocessing + Training Function

In [None]:
def preprocess_and_train(df, target, task):
    df = df.copy()
    
    # If the sex column is not the target, one-hot encode it
    if target != 'sex':
        dummies = pd.get_dummies(df['sex'])
        df = pd.concat([df, dummies], axis=1)
        df = df.drop('sex', axis=1)
    
    # Split target from df
    y = df[target].copy()
    X = df.drop(target, axis=1).copy()
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=1)
    
    # Scale X
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = pd.DataFrame(scaler.transform(X_train), columns=X.columns)
    X_test = pd.DataFrame(scaler.transform(X_test), columns=X.columns)
    
    # Define model
    if task == 'regression':
        model = LinearRegression()
    elif task == 'classification':
        model = LogisticRegression()
    
    # Fit model to train set
    model.fit(X_train, y_train)
    
    # Return the test results
    return model.score(X_test, y_test)

# Predicting Sex Column

In [None]:
data

In [None]:
results = preprocess_and_train(data, target='sex', task='classification')

print("Sex Classification Accuracy: {:.2f}%".format(results * 100))

# Predicting Length Column

In [None]:
data

In [None]:
results = preprocess_and_train(data, target='length', task='regression')

print("Length Regression R^2: {:.4f}".format(results))

# Predicting Diameter Column

In [None]:
data

In [None]:
results = preprocess_and_train(data, target='diameter', task='regression')

print("Diameter Regression R^2: {:.4f}".format(results))

# Predicting Height Column

In [None]:
data

In [None]:
results = preprocess_and_train(data, target='height', task='regression')

print("Height Regression R^2: {:.4f}".format(results))

# Predicting Whole-Weight Column

In [None]:
data

In [None]:
results = preprocess_and_train(data, target='whole-weight', task='regression')

print("Whole-Weight Regression R^2: {:.4f}".format(results))

# Predicting Shucked-Weight Column

In [None]:
data

In [None]:
results = preprocess_and_train(data, target='shucked-weight', task='regression')

print("Shucked-Weight Regression R^2: {:.4f}".format(results))

# Predicting Viscera-Weight Column

In [None]:
data

In [None]:
results = preprocess_and_train(data, target='viscera-weight', task='regression')

print("Viscera-Weight Regression R^2: {:.4f}".format(results))

# Predicting Shell-Weight Column

In [None]:
data

In [None]:
results = preprocess_and_train(data, target='shell-weight', task='regression')

print("Shell-Weight Regression R^2: {:.4f}".format(results))

# Predicting Rings Column

In [None]:
data

In [None]:
results = preprocess_and_train(data, target='rings', task='regression')

print("Rings Regression R^2: {:.4f}".format(results))

In [None]:
results = preprocess_and_train(data, target='rings', task='classification')

print("Rings Classification Accuracy: {:.2f}%".format(results * 100))

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/5YHWuUsoKEo