Load the dataset from a CSV file.

In [None]:
import pandas as pd
data = pd.read_csv('data.csv')

Split the data into training and testing sets.

In [None]:
train_data, test_data = train_test_split(data, test_size=0.2)

Separate features and target variable for training data.

In [None]:
X = train_data.drop('target', axis=1)
y = train_data['target']

Select the top 10 features using univariate statistical tests.

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif
selector = SelectKBest(score_func=f_classif, k=10)
selector.fit(X, y)

Transform the feature set to the selected features.

In [None]:
X_selected = selector.transform(X)

Handle missing values by replacing them with the mean.

In [None]:
X_selected.fillna(X_selected.mean(), inplace=True)

Split the selected features into training and validation sets.

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_selected, y, test_size=0.2)

Train a Random Forest model on the training data.

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

Evaluate the model using AUC on the validation set.

In [None]:
from sklearn.metrics import roc_auc_score
y_pred = model.predict(X_val)
auc = roc_auc_score(y_val, y_pred)

Plot the ROC curve based on false positive and true positive rates.

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve
fpr, tpr, thresholds = roc_curve(y_val, y_pred)
plt.plot(fpr, tpr)

Make predictions on test data and save to CSV for submission.

In [None]:
predictions = model.predict(test_data)
pd.DataFrame(predictions).to_csv('submission.csv', index=False)

Check the format of the submission file.

In [None]:
# Check the format of submission
submission_format = pd.read_csv('submission.csv')

Display the labeled ROC curve.

In [None]:
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.show()