In [30]:
import numpy as np
from sympy import *

init_printing()

In [31]:
# Point class represents a single (x,y) point in 2d Cartesian Space

class Point():
    
    def __init__(self, t, ft):
        self.t = t
        self.ft = ft
    
    def get_x(self):
        return self.t
    
    def get_y(self):
        return self.ft

In [32]:
# Polynomial class takes in a list of coefficients and prints a formatted polynomial as a result

class Polynomial(list): 
    def __repr__(self):
        # joiner[first, negative] = str
        joiner = {
            (True, True): '-',
            (True, False): '',
            (False, True): ' - ',
            (False, False): ' + '
        }

        result = []
        for power, coeff in reversed(list(enumerate(self))):
            j = joiner[not result, coeff < 0]
            coeff = abs(coeff)
            if coeff == 1 and power != 0:
                coeff = ''

            f = {0: '{}{}', 1: '{}{}x'}.get(power, '{}{}x^{}')

            result.append(f.format(j, coeff, power))

        return ''.join(result) or '0'

In [33]:
# DataSet class takes in a list of point objects and creates a dataset. 
# Note to create a dataset with only random data, initialize the DataSet object with an empty array then
# call the randomize method with the desired parameters.

class DataSet():
    
    def __init__(self, data):
        self.n = len(data)
        self.data_points = data
        
    # print_data method prints the values of all the data points
    
    def print_data(self):
        for i in self.data_points:
            print((i.get_x(), i.get_y()))
        
        
    # randomize method can be used to generate a random dataset of n points
    # randomize takes in an int n to represent the number of points, and an int min and max to define a range
    # randomize replaces the values inputted in the list of points in the constructor
    
    def randomize(self, n, min, max):
        self.n = n
        self.values = np.random.randint(min, max, size=(2,n))
        self.data_points = []
        for i in zip(self.values[0], self.values[1]):
            self.data_points.append(Point(i[0],i[1]))
    
    
    # this altered version of the randomize function allows for custom ranges for x and y values
    
    def randomize_2(self, n, x_min, x_max, y_min, y_max):
        self.n = n
        x_values = np.random.randint(x_min, x_max, size = (1,n))
        y_values = np.random.randint(y_min, y_max, size = (1,n))
        self.data_points = []
        for i in zip(x_values[0], y_values[0]):
            self.data_points.append(Point(i[0],i[1]))
        
            
            
    # this method uses Least Squares to approximate the n-degree polynomial of best fit for a given dataset
    # the parameter n specifies the degree of the polynomial
    # use n=1 to generate a line, and n=2 to generate a parabola and so forth
    # note: the values are rounded to three decimal places for simplicity
    
    def get_n_degree_polynomial_of_best_fit(self, n):
        A = []
        b = []
        for i in range(0,len(self.data_points)):
            b.append([self.data_points[i].get_y()])
            A.append([])
            for j in range(0,n+1):
                A[i].append(self.data_points[i].get_x() ** j)
        A = np.array(A)
        b = np.array(b)
        AT = np.transpose(A)
        
        A = np.matmul(AT,A)
        b = np.matmul(AT,b)
        x = np.linalg.solve(A,b)
        poly = []
        for i in x:
            poly.append(round(i[0], 3))
        
        print(Polynomial(poly))
            
        

In [34]:
"""
In this example I generate a data set with the points (0,6), (1,0), and (2,0) from example 1 in
4.3 of Strang's textbook.

I then call the get_n_degree_polynomial_of_best_fit method with arguments n=1, n=2, n=3, n=4 to generate the
line and parabola of best fit.

"""

a = DataSet([Point(0,6), Point(1,0), Point(2,0)])
print("Line of best fit:")
a.get_n_degree_polynomial_of_best_fit(1)

print("Parabola of best fit:")
a.get_n_degree_polynomial_of_best_fit(2)

print("Degree 3 polynomial of best fit:")
a.get_n_degree_polynomial_of_best_fit(3)

print("Degree 4 polynomial of best fit:")
a.get_n_degree_polynomial_of_best_fit(4)


Line of best fit:
-3.0x + 5.0
Parabola of best fit:
3.0x^2 - 9.0x + 6.0
Degree 3 polynomial of best fit:
0.781x^3 + 0.656x^2 - 7.437x + 6.0
Degree 4 polynomial of best fit:
0.145x^4 + 1.148x^3 - 1.457x^2 - 5.836x + 6.0


In [35]:
"""
In this example I generate a random data set with 10 points with x values in the range of (-50,50)
and y values in the range (0,100)

I then call the get_n_degree_polynomial_of_best_fit method with arguments n=1 through n=10.

"""

a = DataSet([])
a.randomize_2(10, -50, 50, 0, 10000)
print("Data:\n")
a.print_data()
for i in range(0,6):
    print("")
    print("Degree {} polynomial of best fit:".format(i))
    a.get_n_degree_polynomial_of_best_fit(i)

Data:

(-37, 5745)
(-18, 5066)
(38, 6647)
(-2, 8291)
(41, 7694)
(43, 9667)
(-4, 7548)
(-38, 3665)
(-29, 558)
(-17, 17)

Degree 0 polynomial of best fit:
5489.8

Degree 1 polynomial of best fit:
65.564x + 5640.597

Degree 2 polynomial of best fit:
-0.132x^2 + 66.765x + 5765.488

Degree 3 polynomial of best fit:
-0.151x^3 - 0.511x^2 + 288.78x + 7571.808

Degree 4 polynomial of best fit:
0.007x^4 - 0.116x^3 - 12.215x^2 + 188.085x + 8442.637

Degree 5 polynomial of best fit:
0.0x^5 + 0.006x^4 - 0.334x^3 - 10.717x^2 + 313.41x + 8949.912
