In [58]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from bokeh.plotting import figure, show
from bokeh.layouts import gridplot

# Load training dataset
train_data = pd.read_csv("train.csv")

# Extract input and output variables
X_train = train_data.iloc[:, 0:1].values
y_train = train_data.iloc[:, 1].values

# Load ideal dataset
ideal_data = pd.read_csv("ideal.csv")

# Extract input and output variables
X_ideal = ideal_data.iloc[:, 0:1].values
y_ideal = ideal_data.iloc[:, 1].values

# Load test dataset
test_data = pd.read_csv("test.csv")

# Extract input and output variables
X_test = test_data.iloc[:, 0:1].values
y_test = test_data.iloc[:, 1].values

# Perform Polynomial Regression
poly_reg = PolynomialFeatures(degree=2)  # You can choose any degree for the polynomial
X_train_poly = poly_reg.fit_transform(X_train)
poly_reg.fit(X_train_poly, y_train)
lin_reg = LinearRegression()
lin_reg.fit(X_train_poly, y_train)

# Predict using the model
y_train_pred = lin_reg.predict(poly_reg.fit_transform(X_train))
y_ideal_pred = lin_reg.predict(poly_reg.fit_transform(X_ideal))
y_test_pred = lin_reg.predict(poly_reg.fit_transform(X_test))

# Evaluate the model
print("Training set metrics:")
print("Mean squared error: %.2f" % mean_squared_error(y_train, y_train_pred))
print("Coefficient of determination (R^2): %.2f" % r2_score(y_train, y_train_pred))
print("\nIdeal set metrics:")
print("Mean squared error: %.2f" % mean_squared_error(y_ideal, y_ideal_pred))
print("Coefficient of determination (R^2): %.2f" % r2_score(y_ideal, y_ideal_pred))
print("\nTest set metrics:")
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_test_pred))
print("Coefficient of determination (R^2): %.2f" % r2_score(y_test, y_test_pred))

# Create interactive scatter plots with Bokeh
train_fig = figure(title='Polynomial Regression (Training set)', x_axis_label='Input variable', y_axis_label='Output variable')
train_fig.scatter(X_train.flatten(), y_train, color='red')
train_fig.line(X_train.flatten(), y_train_pred, color='blue')

ideal_fig = figure(title='Polynomial Regression (Ideal set)', x_axis_label='Input variable', y_axis_label='Output variable')
ideal_fig.scatter(X_ideal.flatten(), y_ideal, color='red')
ideal_fig.line(X_ideal.flatten(), y_ideal_pred, color='blue')

test_fig = figure(title='Polynomial Regression (Test set)', x_axis_label='Input variable', y_axis_label='Output variable')
test_fig.scatter(X_test.flatten(), y_test, color='red')
test_fig.line(X_test.flatten(), y_test_pred, color='blue')

show(gridplot([[train_fig, ideal_fig], [test_fig, None]]))


Training set metrics:
Mean squared error: 0.08
Coefficient of determination (R^2): 1.00

Ideal set metrics:
Mean squared error: 559.54
Coefficient of determination (R^2): -1139.27

Test set metrics:
Mean squared error: 23262211.51
Coefficient of determination (R^2): -0.03
