Load data from a CSV file into a DataFrame.

In [None]:
import pandas as pd

df = pd.read_csv('data_file.csv')

Select the top 10 features based on ANOVA F-value.

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif

X_new = SelectKBest(f_classif, k=10).fit_transform(df.drop('target', axis=1), df['target'])

Split the dataset into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_new, df['target'], test_size=0.2, random_state=42)

Train a linear regression model using the training data.

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

Evaluate the model using Mean Squared Error and R-squared.

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

Visualize the distribution of residuals from the predictions.

In [None]:
import matplotlib.pyplot as plt

plt.hist(y_test - y_pred, bins=20)
plt.title('Residuals Distribution')
plt.show()

Plot true values against predicted values to assess model performance.

In [None]:
plt.scatter(y_test, y_pred)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('True vs Predicted')
plt.show()