In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from bokeh.plotting import figure, output_file, show

# Load the dataset
dataset = pd.read_csv("dataset612.csv")

# Create a binary target variable based on the Deviation column
dataset['y'] = (dataset['Deviation (min)'] >= 0.5).astype(int)

# Separate the features and target variable
X = dataset.drop(['Deviation (min)', 'y'], axis=1)
y = dataset['y']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit polynomial regression models for all the errors in the dataset
for error in ['Error 1', 'Error 2', 'Error 3', 'Error 4']:
    # Create polynomial features of degree 2
    poly = PolynomialFeatures(degree=2)
    X_poly = poly.fit_transform(X_train[[error]])
    
    # Fit the model on the polynomial features
    model = LinearRegression()
    model.fit(X_poly, y_train)
    
    # Make predictions on the test set and calculate accuracy
    X_test_poly = poly.transform(X_test[[error]])
    y_pred = model.predict(X_test_poly)
    accuracy = np.mean((y_pred >= 0.5) == y_test)
    print(f"Accuracy for {error}: {accuracy}")
    
    # Visualize the model predictions using Bokeh
    p = figure(title=f"Polynomial Regression for {error}", x_axis_label=error, y_axis_label="y")
    p.circle(X_test[error], y_test, size=10, alpha=0.5, color="blue", legend_label="Actual")
    p.circle(X_test[error], y_pred, size=10, alpha=0.5, color="green", legend_label="Predicted")
    output_file(f"{error}.html")
    show(p)


Accuracy for Error 1: 0.85
Accuracy for Error 2: 0.7
Accuracy for Error 3: 0.85
Accuracy for Error 4: 0.85
