# Task for Today  

***

## UK Fuel Sale Year Prediction  

Given *data about fuel sales in the UK*, let's try to predict if a given sale was **made in the last nine years**.  
  
We will use a variety of different models to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

In [None]:
data = pd.read_csv('../input/uk-fuel-price-weekly-statistics20032020/fuel price.csv')

In [None]:
data

In [None]:
data.info()

# Preprocessing

In [None]:
def preprocess_inputs(df):
    df = df.copy()
    
    # Drop index column
    df = df.drop('Unnamed: 0', axis=1)
    
    # Generate date columns
    df['Date'] = pd.to_datetime(df['Date'])
    
    df['Year'] = df['Date'].apply(lambda x: x.year)
    df['Month'] = df['Date'].apply(lambda x: x.month)
    df['Day'] = df['Date'].apply(lambda x: x.day)
    
    df = df.drop('Date', axis=1)
    
    # Create labels from the Year column
    df['Year'] = df['Year'].apply(lambda x: 1 if x >= 2012 else 0)
    
    # Split df into X and y
    y = df['Year'].copy()
    X = df.drop('Year', axis=1).copy()
    
    # Scale X with a standard scaler
    scaler = StandardScaler()
    X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
    
    return X, y

In [None]:
X, y = preprocess_inputs(data)

In [None]:
X

In [None]:
y

In [None]:
y.value_counts()

# Training

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=123)

In [None]:
models = {
    "   Logistic Regression": LogisticRegression(),
    "Support Vector Machine": SVC(),
    "         Decision Tree": DecisionTreeClassifier(),
    "        Neural Network": MLPClassifier(),
    "   K-Nearest Neighbors": KNeighborsClassifier(),
    "     Gradient Boosting": GradientBoostingClassifier(),
    "         Random Forest": RandomForestClassifier(),
    "              AdaBoost": AdaBoostClassifier()
}

In [None]:
for model in models.values():
    model.fit(X_train, y_train)

# Results

In [None]:
print("Model Test Accuracies:\n----------------------")

for name, model in models.items():
    print(name + ": {:.2f}%".format(model.score(X_test, y_test) * 100))

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/8TCS3jEKS_U