Load data from a CSV file into a DataFrame using pandas.

In [None]:
import pandas as pd

data = pd.read_csv('data.csv')

Select the top 5 features based on ANOVA F-statistic.

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif

X_new = SelectKBest(f_classif, k=5).fit_transform(X, y)

Split the dataset into training and testing sets with an 80-20 ratio.

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.2, random_state=42)

Instantiate a linear regression model.

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()

Train the model using the training data.

In [None]:
model.fit(X_train, y_train)

Visualize the predictions against the true values from the training set.

In [None]:
import matplotlib.pyplot as plt

plt.scatter(y_train, model.predict(X_train))
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Model Training Results')
plt.show()

Calculate evaluation metrics: mean squared error and R² score.

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, model.predict(X_test))
r2 = r2_score(y_test, model.predict(X_test))

Create a boxplot to compare true values and predicted values.

In [None]:
plt.boxplot([y_test, model.predict(X_test)], labels=['True', 'Predicted'])
plt.title('Evaluation Metrics')
plt.show()

Plot the final predictions against sample indices.

In [None]:
plt.plot(range(len(model.predict(X_test))), model.predict(X_test), marker='o')
plt.title('Final Visualization')
plt.xlabel('Sample Index')
plt.ylabel('Predictions')
plt.show()