# Case Study: Predicting Car Prices with XGBoost

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('automobile.csv')

## Data exploration

In [3]:
print(list(dataset.columns))

['3', '?', 'alfa-romero', 'gas', 'std', 'two', 'convertible', 'rwd', 'front', '88.60', '168.80', '64.10', '48.80', '2548', 'dohc', 'four', '130', 'mpfi', '3.47', '2.68', '9.00', '111', '5000', '21', '27', '13495']


- As we can see the first row does not have the column names in this csv-file
- We'll need to enter them seperately

In [4]:
dataset.head()

Unnamed: 0,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.60,...,130,mpfi,3.47,2.68,9.00,111,5000,21,27,13495
0,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
1,1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500
2,2,164,audi,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950
3,2,164,audi,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450
4,2,?,audi,gas,std,two,sedan,fwd,front,99.8,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,15250


- We can see the data has question mark values indicating missing values

In [5]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 204 entries, 0 to 203
Data columns (total 26 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   3            204 non-null    int64  
 1   ?            204 non-null    object 
 2   alfa-romero  204 non-null    object 
 3   gas          204 non-null    object 
 4   std          204 non-null    object 
 5   two          204 non-null    object 
 6   convertible  204 non-null    object 
 7   rwd          204 non-null    object 
 8   front        204 non-null    object 
 9   88.60        204 non-null    float64
 10  168.80       204 non-null    float64
 11  64.10        204 non-null    float64
 12  48.80        204 non-null    float64
 13  2548         204 non-null    int64  
 14  dohc         204 non-null    object 
 15  four         204 non-null    object 
 16  130          204 non-null    int64  
 17  mpfi         204 non-null    object 
 18  3.47         204 non-null    object 
 19  2.68    

- It appears like there are no null values in the dataset but that is only because the null values are indicated with ?s

In [6]:
is_NaN = dataset.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = dataset[row_has_NaN]
print(len(rows_with_NaN))

0


## Importing the dataset again taking into account what we learned about the data

Let's read the data in again but this time:
- let's specify that *?* indicates a null value in the data
- specifying the data does not include the header row
- let's specify the column names from the data description file

In [7]:
dataset = pd.read_csv(
    'automobile.csv',
    na_values = '?',
    header = None,
    names = [
        'symboling',
        'normalized_losses',
        'make',
        'fuel_type',
        'aspiration',
        'num_of_doors',
        'body_style',
        'drive_wheels',
        'engine_location',
        'wheel_base',
        'length',
        'width',
        'height',
        'curb_weight',
        'engine_type',
        'num_of_cylinders',
        'engine_size',
        'fuel_system',
        'bore',
        'stroke',
        'compression_ratio',
        'horsepower',
        'peak_rpm',
        'city_mpg',
        'highway_mpg',
        'price'
    ])
print(list(dataset.columns))

['symboling', 'normalized_losses', 'make', 'fuel_type', 'aspiration', 'num_of_doors', 'body_style', 'drive_wheels', 'engine_location', 'wheel_base', 'length', 'width', 'height', 'curb_weight', 'engine_type', 'num_of_cylinders', 'engine_size', 'fuel_system', 'bore', 'stroke', 'compression_ratio', 'horsepower', 'peak_rpm', 'city_mpg', 'highway_mpg', 'price']


In [8]:
dataset.head()

Unnamed: 0,symboling,normalized_losses,make,fuel_type,aspiration,num_of_doors,body_style,drive_wheels,engine_location,wheel_base,...,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
0,3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111.0,5000.0,21,27,13495.0
1,3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111.0,5000.0,21,27,16500.0
2,1,,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154.0,5000.0,19,26,16500.0
3,2,164.0,audi,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102.0,5500.0,24,30,13950.0
4,2,164.0,audi,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115.0,5500.0,18,22,17450.0


In [9]:
is_NaN = dataset.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = dataset[row_has_NaN]
print(len(rows_with_NaN))

46


- 46 rows have null values
- Let's see which columns have null values

In [10]:
dataset.isnull().any()

symboling            False
normalized_losses     True
make                 False
fuel_type            False
aspiration           False
num_of_doors          True
body_style           False
drive_wheels         False
engine_location      False
wheel_base           False
length               False
width                False
height               False
curb_weight          False
engine_type          False
num_of_cylinders     False
engine_size          False
fuel_system          False
bore                  True
stroke                True
compression_ratio    False
horsepower            True
peak_rpm              True
city_mpg             False
highway_mpg          False
price                 True
dtype: bool

- Let's see how many null values are there in each column

In [11]:
dataset[dataset.columns[dataset.isnull().any()].tolist()].isnull().sum()

normalized_losses    41
num_of_doors          2
bore                  4
stroke                4
horsepower            2
peak_rpm              2
price                 4
dtype: int64

- Normalized losses has so many missing values it's best to just drop that column

- We can also list all the rows with null values

In [12]:
dataset[dataset.isnull().any(axis = 1)][dataset.columns[dataset.isnull().any()].tolist()]

Unnamed: 0,normalized_losses,num_of_doors,bore,stroke,horsepower,peak_rpm,price
0,,two,3.47,2.68,111.0,5000.0,13495.0
1,,two,3.47,2.68,111.0,5000.0,16500.0
2,,two,2.68,3.47,154.0,5000.0,16500.0
5,,two,3.19,3.4,110.0,5500.0,15250.0
7,,four,3.19,3.4,110.0,5500.0,18920.0
9,,two,3.13,3.4,160.0,5500.0,
14,,four,3.31,3.19,121.0,4250.0,24565.0
15,,four,3.62,3.39,182.0,5400.0,30760.0
16,,two,3.62,3.39,182.0,5400.0,41315.0
17,,four,3.62,3.39,182.0,5400.0,36880.0


- Let's have a look at the data types

In [13]:
dataset.dtypes

symboling              int64
normalized_losses    float64
make                  object
fuel_type             object
aspiration            object
num_of_doors          object
body_style            object
drive_wheels          object
engine_location       object
wheel_base           float64
length               float64
width                float64
height               float64
curb_weight            int64
engine_type           object
num_of_cylinders      object
engine_size            int64
fuel_system           object
bore                 float64
stroke               float64
compression_ratio    float64
horsepower           float64
peak_rpm             float64
city_mpg               int64
highway_mpg            int64
price                float64
dtype: object

Based on the data description file the features should have te following data types:

- symboling: int
- normalized-losses: float
- make: string (categorical)
- fuel-type: string (categorical)
- aspiration: string (categorical)
- num-of-doors: string (categorical)
- body-style: string (categorical)
- drive-wheels: string (categorical)
- engine-location: string (categorical)
- wheel-base: float
- length: float
- width: float
- height: float
- curb-weight: float
- engine-type: string (categorical)
- num-of-cylinders: string (categorical)
- engine-size: float
- fuel-system: string (categorical)
- bore: float
- stroke: float
- compression-ratio: float
- horsepower: float
- peak-rpm: float
- city-mpg: float
- highway-mpg: float
- price: float

- Let's look into the categorical features and their values

In [14]:
print('symboling: ',dataset.symboling.unique())
print('make', dataset.make.unique())
print('fuel_type', dataset.fuel_type.unique())
print('aspiration', dataset.aspiration.unique())
print('num_of_doors', dataset.num_of_doors.unique())
print('body_style', dataset.body_style.unique())
print('drive_wheels', dataset.drive_wheels.unique())
print('engine_location', dataset.engine_location.unique())
print('engine_type', dataset.engine_type.unique())
print('num_of_cylinders', dataset.num_of_cylinders.unique())
print('fuel_system', dataset.fuel_system.unique())

symboling:  [ 3  1  2  0 -1 -2]
make ['alfa-romero' 'audi' 'bmw' 'chevrolet' 'dodge' 'honda' 'isuzu' 'jaguar'
 'mazda' 'mercedes-benz' 'mercury' 'mitsubishi' 'nissan' 'peugot'
 'plymouth' 'porsche' 'renault' 'saab' 'subaru' 'toyota' 'volkswagen'
 'volvo']
fuel_type ['gas' 'diesel']
aspiration ['std' 'turbo']
num_of_doors ['two' 'four' nan]
body_style ['convertible' 'hatchback' 'sedan' 'wagon' 'hardtop']
drive_wheels ['rwd' 'fwd' '4wd']
engine_location ['front' 'rear']
engine_type ['dohc' 'ohcv' 'ohc' 'l' 'rotor' 'ohcf' 'dohcv']
num_of_cylinders ['four' 'six' 'five' 'three' 'twelve' 'two' 'eight']
fuel_system ['mpfi' '2bbl' 'mfi' '1bbl' 'spfi' '4bbl' 'idi' 'spdi']


## Dropping unwanted columns

In [15]:
# dataset.drop(columns=['symboling', 'normalized_losses'], inplace = True)
dataset.drop(columns=['symboling', 'normalized_losses', 'make'], inplace = True)

In [16]:
dataset.head()

Unnamed: 0,fuel_type,aspiration,num_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,...,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
0,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,...,130,mpfi,3.47,2.68,9.0,111.0,5000.0,21,27,13495.0
1,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,...,130,mpfi,3.47,2.68,9.0,111.0,5000.0,21,27,16500.0
2,gas,std,two,hatchback,rwd,front,94.5,171.2,65.5,52.4,...,152,mpfi,2.68,3.47,9.0,154.0,5000.0,19,26,16500.0
3,gas,std,four,sedan,fwd,front,99.8,176.6,66.2,54.3,...,109,mpfi,3.19,3.4,10.0,102.0,5500.0,24,30,13950.0
4,gas,std,four,sedan,4wd,front,99.4,176.6,66.4,54.3,...,136,mpfi,3.19,3.4,8.0,115.0,5500.0,18,22,17450.0


## Taking care of missing data

In [17]:
dataset[dataset.num_of_doors.isnull()]

Unnamed: 0,fuel_type,aspiration,num_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,...,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
27,gas,turbo,,sedan,fwd,front,93.7,157.3,63.8,50.6,...,98,mpfi,3.03,3.39,7.6,102.0,5500.0,24,30,8558.0
63,diesel,std,,sedan,fwd,front,98.8,177.8,66.5,55.5,...,122,idi,3.39,3.39,22.7,64.0,4650.0,36,42,10795.0


In [18]:
print(dataset.num_of_doors[dataset.body_style == 'sedan'].value_counts())

print(dataset.num_of_doors[dataset.engine_location == 'front'].value_counts())

print(dataset.num_of_doors[(dataset.body_style == 'sedan') & (dataset.engine_location == 'front')].value_counts())

print(dataset.num_of_doors[dataset.drive_wheels == 'fwd'].value_counts())

four    79
two     15
Name: num_of_doors, dtype: int64
four    114
two      86
Name: num_of_doors, dtype: int64
four    79
two     15
Name: num_of_doors, dtype: int64
four    68
two     50
Name: num_of_doors, dtype: int64


- the majority of sedan cars with an engine in the front have four doors

In [19]:
dataset.loc[27, 'num_of_doors'] = 'four'
dataset.loc[63, 'num_of_doors'] = 'four'

In [20]:
dataset[dataset.num_of_doors.isnull()]

Unnamed: 0,fuel_type,aspiration,num_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,...,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price


In [21]:
dataset[dataset.price.isnull()]

Unnamed: 0,fuel_type,aspiration,num_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,...,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
9,gas,turbo,two,hatchback,4wd,front,99.5,178.2,67.9,52.0,...,131,mpfi,3.13,3.4,7.0,160.0,5500.0,16,22,
44,gas,std,two,sedan,fwd,front,94.5,155.9,63.6,52.0,...,90,2bbl,3.03,3.11,9.6,70.0,5400.0,38,43,
45,gas,std,four,sedan,fwd,front,94.5,155.9,63.6,52.0,...,90,2bbl,3.03,3.11,9.6,70.0,5400.0,38,43,
129,gas,std,two,hatchback,rwd,front,98.4,175.7,72.3,50.5,...,203,mpfi,3.94,3.11,10.0,288.0,5750.0,17,28,


In [22]:
dataset.drop(dataset[dataset.price.isnull()].index, axis = 0, inplace = True)

In [23]:
dataset[dataset.price.isnull()]

Unnamed: 0,fuel_type,aspiration,num_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,...,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price


- It makes sense to convert the num_of_cylinders to a numerical value

In [24]:
dataset.num_of_cylinders.value_counts()

four      157
six        24
five       10
eight       4
two         4
twelve      1
three       1
Name: num_of_cylinders, dtype: int64

In [25]:
dataset.loc[dataset.index[dataset.num_of_cylinders == 'four'], 'num_of_cylinders'] = 4
dataset.loc[dataset.index[dataset.num_of_cylinders == 'six'], 'num_of_cylinders'] = 6
dataset.loc[dataset.index[dataset.num_of_cylinders == 'five'], 'num_of_cylinders'] = 5
dataset.loc[dataset.index[dataset.num_of_cylinders == 'eight'], 'num_of_cylinders'] = 8
dataset.loc[dataset.index[dataset.num_of_cylinders == 'two'], 'num_of_cylinders'] = 2
dataset.loc[dataset.index[dataset.num_of_cylinders == 'three'], 'num_of_cylinders'] = 3
dataset.loc[dataset.index[dataset.num_of_cylinders == 'twelve'], 'num_of_cylinders'] = 12

In [26]:
dataset.num_of_cylinders.value_counts()

4     157
6      24
5      10
2       4
8       4
3       1
12      1
Name: num_of_cylinders, dtype: int64

In [27]:
dataset.dtypes

fuel_type             object
aspiration            object
num_of_doors          object
body_style            object
drive_wheels          object
engine_location       object
wheel_base           float64
length               float64
width                float64
height               float64
curb_weight            int64
engine_type           object
num_of_cylinders      object
engine_size            int64
fuel_system           object
bore                 float64
stroke               float64
compression_ratio    float64
horsepower           float64
peak_rpm             float64
city_mpg               int64
highway_mpg            int64
price                float64
dtype: object

In [28]:
dataset.num_of_cylinders = dataset.num_of_cylinders.astype('int')

In [29]:
dataset.dtypes

fuel_type             object
aspiration            object
num_of_doors          object
body_style            object
drive_wheels          object
engine_location       object
wheel_base           float64
length               float64
width                float64
height               float64
curb_weight            int64
engine_type           object
num_of_cylinders       int32
engine_size            int64
fuel_system           object
bore                 float64
stroke               float64
compression_ratio    float64
horsepower           float64
peak_rpm             float64
city_mpg               int64
highway_mpg            int64
price                float64
dtype: object

In [30]:
dataset[dataset.bore.isnull()]

Unnamed: 0,fuel_type,aspiration,num_of_doors,body_style,drive_wheels,engine_location,wheel_base,length,width,height,...,engine_size,fuel_system,bore,stroke,compression_ratio,horsepower,peak_rpm,city_mpg,highway_mpg,price
55,gas,std,two,hatchback,rwd,front,95.3,169.0,65.7,49.6,...,70,4bbl,,,9.4,101.0,6000.0,17,23,10945.0
56,gas,std,two,hatchback,rwd,front,95.3,169.0,65.7,49.6,...,70,4bbl,,,9.4,101.0,6000.0,17,23,11845.0
57,gas,std,two,hatchback,rwd,front,95.3,169.0,65.7,49.6,...,70,4bbl,,,9.4,101.0,6000.0,17,23,13645.0
58,gas,std,two,hatchback,rwd,front,95.3,169.0,65.7,49.6,...,80,mpfi,,,9.4,135.0,6000.0,16,23,15645.0


In [31]:
dataset.columns.get_loc("bore")

15

In [32]:
dataset.columns.get_loc("stroke")

16

In [33]:
dataset.columns.get_loc("horsepower")

18

In [34]:
dataset.columns.get_loc("peak_rpm")

19

In [35]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [36]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
imputer.fit(X[:, 15:20])
X[:, 15:20] = imputer.transform(X[:, 15:20])

In [37]:
X[:, 15:20]

array([[3.47, 2.68, 9.0, 111.0, 5000.0],
       [3.47, 2.68, 9.0, 111.0, 5000.0],
       [2.68, 3.47, 9.0, 154.0, 5000.0],
       ...,
       [3.58, 2.87, 8.8, 134.0, 5500.0],
       [3.01, 3.4, 23.0, 106.0, 4800.0],
       [3.78, 3.15, 9.5, 114.0, 5400.0]], dtype=object)

In [38]:
print(X)

[['gas' 'std' 'two' ... 5000.0 21 27]
 ['gas' 'std' 'two' ... 5000.0 21 27]
 ['gas' 'std' 'two' ... 5000.0 19 26]
 ...
 ['gas' 'std' 'four' ... 5500.0 18 23]
 ['diesel' 'turbo' 'four' ... 4800.0 26 27]
 ['gas' 'turbo' 'four' ... 5400.0 19 25]]


## Encoding categorical data

In [39]:
categorical_columns = ['fuel_type', 'aspiration', 'num_of_doors', 'body_style',
                       'drive_wheels', 'engine_location', 'engine_type', 'fuel_system']
for col in categorical_columns:
    print(dataset.columns.get_loc(col))

0
1
2
3
4
5
11
14


## Encoding the Independent Variable

In [40]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0, 1, 2, 3, 4, 5, 11, 14])] , remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [41]:
X[:1]

array([[0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0, 1.0, 0.0, 0.0, 88.6, 168.8, 64.1, 48.8, 2548, 4, 130, 3.47,
        2.68, 9.0, 111.0, 5000.0, 21, 27]], dtype=object)

## Splitting the dataset into the Training set and the Test set

In [42]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [43]:
print(X_train)

[[0.0 1.0 1.0 ... 5200.0 20 24]
 [0.0 1.0 1.0 ... 5800.0 23 29]
 [0.0 1.0 1.0 ... 4200.0 27 32]
 ...
 [0.0 1.0 1.0 ... 4800.0 27 32]
 [0.0 1.0 1.0 ... 5200.0 19 25]
 [0.0 1.0 1.0 ... 4800.0 24 30]]


## Training the Multiple Linear Regression model on the Training set

In [44]:
from xgboost import XGBRegressor
regressor = XGBRegressor(use_label_encoder = True, eval_metric = 'logloss')
regressor.fit(X_train, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, eval_metric='logloss',
             gamma=0, gpu_id=-1, importance_type='gain',
             interaction_constraints='', learning_rate=0.300000012,
             max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=100, n_jobs=4,
             num_parallel_tree=1, random_state=0, reg_alpha=0, reg_lambda=1,
             scale_pos_weight=1, subsample=1, tree_method='exact',
             use_label_encoder=True, validate_parameters=1, verbosity=None)

## Predicting the Test set results

In [45]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
# evaluate performance by comparing the predicted prices and the real prices
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), axis=1))

[[13979.04 13499.  ]
 [38156.81 30760.  ]
 [33156.34 37028.  ]
 [ 8001.86  9095.  ]
 [18004.31 18150.  ]
 [ 7698.07  7775.  ]
 [14253.94 11549.  ]
 [15895.39 12440.  ]
 [16436.85 13415.  ]
 [ 6927.42  6849.  ]
 [ 6253.02  6295.  ]
 [ 6684.79  7609.  ]
 [ 8220.3   7975.  ]
 [ 8093.57  7957.  ]
 [12069.33  9295.  ]
 [ 8450.53  9639.  ]
 [16186.14 15750.  ]
 [ 7753.36  7198.  ]
 [15080.85 11048.  ]
 [11893.4  10698.  ]
 [40768.48 36000.  ]
 [18771.51 22625.  ]
 [18957.92 19045.  ]
 [32249.   35550.  ]
 [ 8691.5   9298.  ]
 [27895.98 31600.  ]
 [36076.46 40960.  ]
 [10454.62  8948.  ]
 [ 8845.16 10595.  ]
 [15540.66 13295.  ]
 [13739.55 15510.  ]
 [ 9213.11  9495.  ]
 [ 7506.95  7799.  ]
 [10622.03  8845.  ]
 [12114.17 12964.  ]
 [ 8866.23  9549.  ]
 [ 7141.05  6695.  ]
 [16944.92 15580.  ]
 [20109.01 18150.  ]
 [13489.21 12629.  ]
 [10952.6  11845.  ]]


## Evaluating model performance

In [46]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9321777311321031

In [47]:
actual_data = np.array(y_test)

for i in range(len(y_pred)):
    actual = actual_data[i]
    predicted = y_pred[i]
    explained = ((actual_data[i] - y_pred[i])/actual_data[i])*100
    
    print('Actual value: ${:,.2f}, Predicted value: ${:,.2f} ({:,.2f} %)'.format(actual, predicted, explained))

Actual value: $13,499.00, Predicted value: $13,979.04 (-3.56 %)
Actual value: $30,760.00, Predicted value: $38,156.81 (-24.05 %)
Actual value: $37,028.00, Predicted value: $33,156.34 (10.46 %)
Actual value: $9,095.00, Predicted value: $8,001.86 (12.02 %)
Actual value: $18,150.00, Predicted value: $18,004.31 (0.80 %)
Actual value: $7,775.00, Predicted value: $7,698.07 (0.99 %)
Actual value: $11,549.00, Predicted value: $14,253.94 (-23.42 %)
Actual value: $12,440.00, Predicted value: $15,895.39 (-27.78 %)
Actual value: $13,415.00, Predicted value: $16,436.85 (-22.53 %)
Actual value: $6,849.00, Predicted value: $6,927.42 (-1.15 %)
Actual value: $6,295.00, Predicted value: $6,253.02 (0.67 %)
Actual value: $7,609.00, Predicted value: $6,684.79 (12.15 %)
Actual value: $7,975.00, Predicted value: $8,220.30 (-3.08 %)
Actual value: $7,957.00, Predicted value: $8,093.57 (-1.72 %)
Actual value: $9,295.00, Predicted value: $12,069.33 (-29.85 %)
Actual value: $9,639.00, Predicted value: $8,450.53 (