In [None]:
from IPython.core.display import HTML
with open ("../style.css", "r") as file:
    css = file.read()
HTML(css)

# Simple Linear Regression

We need to read our data from a <tt>csv</tt> file.  The module `csv` offers a number of functions for reading and writing a <tt>csv</tt> file.

In [None]:
import csv

Let us read the data.

In [None]:
with open('cars.csv') as handle:
    reader = csv.DictReader(handle, delimiter=',')
    Data   = [] # engine displacement
    for row in reader:
        Data.append(row)

In [None]:
Data[:5]

In [None]:
import numpy as np

Since <em style="color:blue;">kilometres per litre</em> is the **inverse** of the fuel consumption, the vector `Y` is defined as follows:

In [None]:
def simple_linear_regression(X, Y):
    xMean = np.mean(X)
    yMean = np.mean(Y)
    ϑ1    = np.sum((X - xMean) * (Y - yMean)) / np.sum((X - xMean) ** 2)
    ϑ0    = yMean - ϑ1 * xMean
    TSS   = np.sum((Y - yMean) ** 2)
    RSS   = np.sum((ϑ1 * X + ϑ0 - Y) ** 2)
    R2    = 1 - RSS/TSS
    return R2

It seems that about $75\%$ of the fuel consumption is explained by the engine displacement.  We can get a better model of the fuel consumption if we use more variables for explaining the fuel consumption.  For example, the weight of a car is also responsible for its fuel consumption.

In [None]:
def coefficient_of_determination(name):
    X  = np.array([float(line[name])    for line in Data])
    Y  = np.array([1/float(line['mpg']) for line in Data])
    R2 = simple_linear_regression(X, Y)
    print(f'coefficient of determination of fuel consumption w.r.t. {name:12s}: {round(R2, 2)}')

In [None]:
DependentVars = ['cyl', 'displacement', 'hp', 'weight', 'acc', 'year']

In [None]:
for name in DependentVars:
    coefficient_of_determination(name)