Load data from a CSV file into a Pandas DataFrame.

In [None]:
import pandas as pd

df = pd.read_csv('data_file.csv')

Perform feature selection using statistical tests.

In [None]:
from sklearn.feature_selection import SelectKBest, f_classif

X, y = df.iloc[:, :-1], df.iloc[:, -1]
X_new = SelectKBest(score_func=f_classif, k='all').fit_transform(X, y)

Split the data into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.2, random_state=42)

Train a logistic regression ML model using the training data.

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)

Make predictions on the test set using the trained model.

In [None]:
y_pred = model.predict(X_test)

Visualize the results by plotting true vs predicted values.

In [None]:
import matplotlib.pyplot as plt

plt.scatter(X_test['feature1'], y_test, color='blue')
plt.scatter(X_test['feature1'], y_pred, color='red')
plt.title('Model Predictions')
plt.xlabel('Feature 1')
plt.ylabel('Outcome')
plt.show()

Evaluate model accuracy using the test data.

In [None]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Save the trained model for future use.

In [None]:
import joblib

joblib.dump(model, 'final_model.pkl')

Create a final visualization comparing original and predicted outcomes.

In [None]:
import matplotlib.pyplot as plt

plt.hist(y, bins=10, alpha=0.5, color='blue', label='Original')
plt.hist(y_pred, bins=10, alpha=0.5, color='red', label='Predicted')
plt.title('Final Visualization')
plt.legend()
plt.show()