Cell 1: Imports

In [None]:
# Manasa Basavaraja
# Assignment 5 Polynomial Regression
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

Cell 2: Dataset Generation

In [None]:
# Generating the random test points here
# Printing a list of 15 random points between -2 and 2
x = np.random.uniform(-2, 2, 15)

# Use random noise to pass these x values to the function y.
# y = x^3 + noise
y = x**3 + 0.5 * np.random.normal(0, 1, 1)

# Creating a dataframe and displaying it.
dataframe = pd.DataFrame(np.hstack((x[:, None], y[:, None])), columns=['x', 'y'])
dataframe

Cell 3: Initial Scatter Plot

In [None]:
# Generating plot for different models separately
x_vals = dataframe['x']
x_reshaped = np.array(x_vals).reshape(-1, 1)
y_vals = dataframe['y']

# Creating a scatter plot of (x,y) coordinates
plt.scatter(x_reshaped, y_vals)
plt.title('Scatter plot of the (x,y) coordinates')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.show()

Cell 4: Degree-1 (Linear) Regression

In [None]:
# Degree 1 (Linear) regression model
reg = linear_model.LinearRegression().fit(x_reshaped, y_vals)
y_pred = reg.predict(x_reshaped)

plt.scatter(x_reshaped, y_vals, label='Actual points')
plt.title('Degree-1: (Linear) Regression model')
plt.plot(x_reshaped, y_pred, label='Predicted points', color='#A52A2A')
plt.xlabel('x-axis')
plt.ylabel('y_predicted_points')
plt.legend(loc='lower right')
plt.show()

Cell 5: Degree-2 (Quadratic) Regression

In [None]:
# Degree 2 quadratic regression model
poly_reg2 = PolynomialFeatures(degree=2)
x_poly2 = poly_reg2.fit_transform(x_reshaped)

lin_reg2 = linear_model.LinearRegression()
lin_reg2.fit(x_poly2, y_vals)

# Visualize the Linear regression model with degree 2
plt.scatter(x_reshaped, y_vals, color='#A52A2A', label='Actual Points')
plt.plot(x_reshaped, lin_reg2.predict(poly_reg2.fit_transform(x_reshaped)), color='#008B8B', label='Predicted Points')
plt.title('Degree-2 (Quadratic) Regression model')
plt.xlabel('x')
plt.ylabel('y_predicted_points')
plt.legend(loc='lower right')
plt.show()

Cell 6: Degree-3 (Cubic) Regression

In [None]:
# Degree 3 Cubic regression
poly_reg3 = PolynomialFeatures(degree=3)
x_poly3 = poly_reg3.fit_transform(x_reshaped)

lin_reg3 = linear_model.LinearRegression()
lin_reg3.fit(x_poly3, y_vals)

# Visualizing the Linear regression model with degree 3
plt.scatter(x_reshaped, y_vals, color='#A52A2A', label='Actual Points')
plt.plot(x_reshaped, lin_reg3.predict(poly_reg3.fit_transform(x_reshaped)), color='#008B8B', label='Predicted Points')
plt.title('Degree-3 (Cubic) Regression model')
plt.xlabel('x')
plt.ylabel('y_predicted_points')
plt.legend(loc='lower right')
plt.show()

Cell 7: Degree-4 Regression

In [None]:
# Degree 4
poly_reg4 = PolynomialFeatures(degree=4)
x_poly4 = poly_reg4.fit_transform(x_reshaped)

lin_reg4 = linear_model.LinearRegression()
lin_reg4.fit(x_poly4, y_vals)

# Visualize the Linear regression model with degree 4
plt.scatter(x_reshaped, y_vals, color='#A52A2A', label='Actual Points')
plt.plot(x_reshaped, lin_reg4.predict(poly_reg4.fit_transform(x_reshaped)), color='#008B8B', label='Predicted Points')
plt.title('Degree-4')
plt.xlabel('x')
plt.ylabel('y_predicted_points')
plt.legend(loc='lower right')
plt.show()

Cell 8: Combined Comparison Plot

In [None]:
# Combine the graphs
plt.scatter(x_reshaped, y_vals, color='red', label='Actual')
plt.plot(x_reshaped, y_pred, label='Linear')
plt.plot(x_reshaped, lin_reg2.predict(poly_reg2.fit_transform(x_reshaped)), label='Quadratic')
plt.plot(x_reshaped, lin_reg3.predict(poly_reg3.fit_transform(x_reshaped)), label='Cubic')
plt.plot(x_reshaped, lin_reg4.predict(poly_reg4.fit_transform(x_reshaped)), label='Degree 4')
plt.title('Regression models with different degrees')
plt.xlabel('x-axis')
plt.ylabel('y_predicted_points')
plt.legend()
plt.show()

Cell 9: Testing and Evaluation

In [None]:
# Testing the models
# Create a new test dataset
x_test = np.random.uniform(-2, 2, 100)
y_test = x_test**3 + 0.5 * np.random.normal(0, 1, 1)

plt.scatter(x_test, y_test, color='#008B8B')
plt.title('Data points created for testing')
plt.xlabel('x-axis')
plt.ylabel('y-axis')
plt.show()

# Fit the test data set with various models and compute the mean square error
x_test_reshaped = np.array(x_test).reshape(-1, 1)

# Linear regression
lin_reg_test = linear_model.LinearRegression()
lin_reg_test.fit(x_test_reshaped, y_test)
y_pred_test = lin_reg_test.predict(x_test_reshaped)
print(f'Root Mean squared error (Linear): {mean_squared_error(y_test, y_pred_test):.3f}')

# Degree 2
x_poly2_test = poly_reg2.fit_transform(x_test_reshaped)
lin_reg2.fit(x_poly2_test, y_test)
y_pred2_test = lin_reg2.predict(x_poly2_test)
print(f'Root Mean squared error (degree 2): {mean_squared_error(y_test, y_pred2_test):.3f}')

# Degree 3
x_poly3_test = poly_reg3.fit_transform(x_test_reshaped)
lin_reg3.fit(x_poly3_test, y_test)
y_pred3_test = lin_reg3.predict(x_poly3_test)
print(f'Root Mean squared error (degree 3): {mean_squared_error(y_test, y_pred3_test):.3f}')

# Degree 4
x_poly4_test = poly_reg4.fit_transform(x_test_reshaped)
lin_reg4.fit(x_poly4_test, y_test)
y_pred4_test = lin_reg4.predict(x_poly4_test)
print(f'Root Mean squared error (degree 4): {mean_squared_error(y_test, y_pred4_test):.3f}')

# Note: Degree 3 and Degree 4 Polynomial Regression models are best because of the lower error.