Load training, test, and sample submission data from CSV files.

In [None]:
import pandas as pd
train_data = pd.read_csv('training.csv')
test_data = pd.read_csv('test.csv')
sample_submission = pd.read_csv('sample_submission.csv')

Select the top 10 features from the training data based on statistical tests.

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif
X = train_data.drop('target', axis=1)
y = train_data['target']
selector = SelectKBest(score_func=f_classif, k=10)
selector.fit(X, y)
X_selected = selector.transform(X)

Create a DataFrame of the selected features.

In [None]:
feature_data = pd.DataFrame(X_selected, columns=selector.get_feature_names_out())

Create a new feature based on existing features.

In [None]:
import numpy as np
X['new_feature'] = X['feature1'] * X['feature2']  # Example of feature engineering

Split the selected features and target into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

Train a Random Forest model using the training data.

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

Make predictions on the test data and save them to a CSV file.

In [None]:
predictions = model.predict(X_test)
predictions_df = pd.DataFrame(predictions, columns=['Predictions'])
predictions_df.to_csv('predictions.csv', index=False)

Evaluate the model's accuracy using the test set.

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, predictions)

Save the predictions in a submission file.

In [None]:
predictions_df.to_csv('submission.csv', index=False)