# Linear Algebra in Python: Matrix Inverses and Least-Squares

## Vectors, Matrices, and the Role of Linear Algebra

In [None]:
import numpy as np

np.array([[1, 2], [3, 4], [5, 6]])
# Expected:
# array([[1, 2],
#        [3, 4],
#        [5, 6]])

## Calculating Inverses and Determinants With `scipy.linalg`

In [None]:
import numpy as np
from scipy import linalg

A = np.array(
    [
        [1, 9, 2, 1, 1],
        [10, 1, 2, 1, 1],
        [1, 0, 5, 1, 1],
        [2, 1, 1, 2, 9],
        [2, 1, 2, 13, 2],
    ]
)
b = np.array([170, 180, 140, 180, 350]).reshape((5, 1))
A_inv = linalg.inv(A)
x = A_inv @ b
x
# Expected:
# array([[10.],
#        [10.],
#        [20.],
#        [20.],
#        [10.]])

In [None]:
linalg.det(A)
# Expected:
# 45102.0

In [None]:
A_inv
# Expected:
# array([[-0.01077558,  0.10655847, -0.03565252, -0.0058534 , -0.00372489],
#        [ 0.11287748, -0.00512172, -0.04010909, -0.00658507, -0.0041905 ],
#        [ 0.0052991 , -0.01536517,  0.21300608, -0.01975522, -0.0125715 ],
#        [-0.0064077 , -0.01070906, -0.02325839, -0.01376879,  0.08214713],
#        [-0.00931223, -0.01902355, -0.00611946,  0.1183983 , -0.01556472]])

## Interpolating Polynomials With Linear Systems

In [None]:
import numpy as np
from scipy import linalg

A = np.array([[1, 1, 1], [1, 2, 4], [1, 3, 9]])
linalg.det(A)
# Expected:
# 1.9999999999999996

In [None]:
b = np.array([5, 13, 25]).reshape((3, 1))
a = linalg.inv(A) @ b
a
# Expected:
# array([[1.],
#        [2.],
#        [2.]])

In [None]:
A = np.array([[1, 1, 1], [1, 2, 4], [1, 2, 4]])
linalg.det(A)
# Expected:
# 0.0

In [None]:
b = np.array([5, 13, 25]).reshape((3, 1))
x = linalg.inv(A) @ b
# Expected:
# ---------------------------------------------------------------------------
# LinAlgError                               Traceback (most recent call last)
# <ipython-input-10-e6ee9b06a6fe> in <module>
# ----> 1 x = linalg.inv(A) @ b
# LinAlgError: singular matrix

## Predicting Prices With Least Squares

### Building Least Squares Models Using `scipy.linalg`

In [None]:
import numpy as np
from scipy import linalg

A = np.array([[1, 1, 1], [1, 2, 4], [1, 2, 4]])
b = np.array([5, 13, 25]).reshape((3, 1))
p, *_ = linalg.lstsq(A, b)
p
# Expected:
# array([[-0.42857143],
#        [ 1.14285714],
#        [ 4.28571429]])

In [None]:
import matplotlib.pyplot as plt

x = np.linspace(0, 3, 1000)
y = p[0] + p[1] * x + p[2] * x**2
plt.plot(x, y)
plt.plot(1, 5, "ro")
plt.plot(2, 13, "ro")
plt.plot(2, 25, "ro")

### Obtaining Least Squares Solutions Using Pseudoinverse

In [None]:
import numpy as np
from scipy import linalg

A = np.array([[1, 1, 1], [1, 2, 4], [1, 2, 4]])
b = np.array([5, 13, 25]).reshape((3, 1))
A_pinv = linalg.pinv(A)
p2 = A_pinv @ b
p2
# Expected:
# array([[-0.42857143],
#        [ 1.14285714],
#        [ 4.28571429]])

In [None]:
A_pinv
# Expected:
# array([[ 1.        , -0.14285714, -0.14285714],
#        [ 0.5       , -0.03571429, -0.03571429],
#        [-0.5       ,  0.17857143,  0.17857143]])

## Example: Predicting Car Prices With Least Squares

### Preparing the Data

In [None]:
import pandas as pd

cars_data = pd.read_csv("vehicles_cleaned.csv")

In [None]:
cars_data.columns
# Expected:
# Index(['price', 'year', 'condition', 'cylinders', 'fuel', 'odometer',
#        'transmission', 'size', 'type'],
#       dtype='object')

In [None]:
cars_data.iloc[0]
# Expected:
# price                  7000
# year                   2011
# condition              good
# cylinders       4 cylinders
# fuel                    gas
# odometer              76202
# transmission      automatic
# size                compact
# type                  sedan

In [None]:
cars_data_dummies = pd.get_dummies(
    cars_data,
    columns=[
        "condition",
        "cylinders",
        "fuel",
        "transmission",
        "size",
        "type",
    ],
    drop_first=True,
)

In [None]:
cars_data_dummies.columns
# Expected:
# Index(['price', 'year', 'odometer', 'condition_fair', 'condition_good',
#        'condition_like new', 'condition_new', 'condition_salvage',
#        'cylinders_6 cylinders', 'fuel_gas', 'transmission_manual',
#        'size_full-size', 'size_mid-size', 'size_sub-compact', 'type_hatchback',
#        'type_sedan', 'type_wagon'],
#       dtype='object')

In [None]:
cars_data_dummies["intercept"] = 1

### Building the Model

In [None]:
A = cars_data_dummies.drop(columns=["price"]).to_numpy()
b = cars_data_dummies.loc[:, "price"].to_numpy()

In [None]:
from scipy import linalg

p, *_ = linalg.lstsq(A, b)
p
# Expected:
# array([ 8.47362988e+02, -3.53913729e-02, -3.47144752e+03, -1.66981155e+03,
#        -1.80240398e+02, -7.15885691e+03, -6.36540791e+03,  3.76583261e+03,
#        -1.84837210e+03,  1.31935783e+03,  6.60484388e+02,  6.38913933e+02,
#         1.54163679e+02, -1.76423109e+03, -1.99439766e+03,  6.97365788e+02,
#        -1.68998811e+06])

In [None]:
p2 = linalg.pinv(A) @ b
p2
# Expected:
# array([ 8.47362988e+02, -3.53913729e-02, -3.47144752e+03, -1.66981155e+03,
#        -1.80240398e+02, -7.15885691e+03, -6.36540791e+03,  3.76583261e+03,
#        -1.84837210e+03,  1.31935783e+03,  6.60484388e+02,  6.38913933e+02,
#         1.54163679e+02, -1.76423109e+03, -1.99439766e+03,  6.97365788e+02,
#        -1.68998811e+06])

### Predicting Prices

In [None]:
cars_data_dummies.drop(columns=["price"]).columns
# Expected:
# Index(['price', 'year', 'odometer', 'condition_fair', 'condition_good',
#        'condition_like new', 'condition_new', 'condition_salvage',
#        'cylinders_6 cylinders', 'fuel_gas', 'transmission_manual',
#        'size_full-size', 'size_mid-size', 'size_sub-compact', 'type_hatchback',
#        'type_sedan', 'type_wagon', 'intercept'],
#       dtype='object')

In [None]:
import numpy as np

car = np.array([2010, 50000, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1])

In [None]:
predicted_price = p @ car
predicted_price
# Expected:
# 6159.510724281656