# Generate Data

In [None]:
import numpy as np
import pandas as pd

np.random.seed(42)

# Create training data
n_train = 10000
X_train = np.random.normal(0, 1, (n_train, 3))
y_train = 5 * X_train[:, 0] - 3 * X_train[:, 1] + 2 * X_train[:, 2] + np.random.normal(0, 1, n_train)

train_df = pd.DataFrame(X_train, columns=['feature_1', 'feature_2', 'feature_3'])
train_df['target'] = y_train
train_df.to_csv("train.csv", index=False)

# Create evaluation data
n_eval = 5000
X_eval = np.random.normal(0, 1, (n_eval, 3))
y_eval = 5 * X_eval[:, 0] - 3 * X_eval[:, 1] + 2 * X_eval[:, 2] + np.random.normal(0, 1, n_eval)

eval_df = pd.DataFrame(X_eval, columns=['feature_1', 'feature_2', 'feature_3'])
eval_df.insert(0, 'id', np.arange(1, n_eval + 1))  # Add unique ID
eval_df.to_csv("eval.csv", index=False)

# Save only ID and true target for evaluation
true_targets_df = pd.DataFrame({
    'id': np.arange(1, n_eval + 1),
    'eval_target': y_eval
})
true_targets_df.to_csv("eval_true_targets.csv", index=False)

# Evaluate Data

In [2]:
import pandas as pd
from sklearn.metrics import mean_squared_error

# Load predictions from the students and the ground truth
pred_df = pd.read_csv("predictions.csv")  # submitted file
eval_df = pd.read_csv("eval_true_targets.csv")  # private ground truth

# Merge both dataframes on the 'id' column to align rows
merged_df = pd.merge(pred_df, eval_df, on='id')

# Compute Mean Squared Error
mse = mean_squared_error(merged_df['eval_target'], merged_df['prediction'])
print(f"Mean Squared Error: {mse:.4f}")

Mean Squared Error: 1.0289
