Load the training, testing, and sample submission datasets using pandas.

In [None]:
import pandas as pd
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
sample_submission = pd.read_csv('sample_submission.csv')

Preprocess the data by filling missing values and encoding categorical variables.

In [None]:
# Handle missing values and encode categorical variables
def preprocess_data(df):
    df.fillna(df.mean(), inplace=True)
    df = pd.get_dummies(df)
    return df
train = preprocess_data(train)
test = preprocess_data(test)

Scale the features using standard scaling to bring them to a standard range.

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train)
test_scaled = scaler.transform(test)

Reshape the data for modeling by separating features and target variable.

In [None]:
import numpy as np
X = train_scaled[:, :-1]
y = train_scaled[:, -1].reshape(-1, 1)

Define the model using Linear Regression.

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

Train the model using the training data.

In [None]:
model.fit(X, y)

Plot metrics to visualize the performance of the model.

In [None]:
import matplotlib.pyplot as plt
predictions = model.predict(X)
plt.plot(y, label='Ground Truth')
plt.plot(predictions, label='Predicted')
plt.legend()
plt.show()

Prepare and save the submission file with predictions.

In [None]:
submission = pd.DataFrame({'Id': test['Id'], 'Prediction': model.predict(test_scaled)})
submission.to_csv('submission.csv', index=False)

Compare predictions against ground truth using a scatter plot.

In [None]:
import seaborn as sns
sns.scatterplot(x=y.flatten(), y=predictions)
plt.xlabel('Ground Truth')
plt.ylabel('Predicted')
plt.title('Ground Truth vs Predicted')
plt.show()