In [4]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import recall_score
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import RFECV
from sklearn.model_selection import KFold

In [5]:
train_data = pd.read_csv('../../data/raw/train.csv',  encoding= 'unicode_escape')
y_train = train_data["Unusual"]                      #defining the labels
X_train = train_data.drop(["Unusual"], axis=1)
test_data = pd.read_csv('../../data/raw/test.csv',  encoding= 'unicode_escape')
y_test = test_data["Unusual"]                      #defining the labels
X_test = test_data.drop(["Unusual"], axis=1)

In [6]:
# Define the model pipeline
#RFECV (Recursive Feature Elimination with Cross-Validation)

model_pipeline = Pipeline([
    ('preprocessing', joblib.load('../pipelines/PreprocessingPipeline.joblib')),
    ('feature_selection', RFECV(estimator=RandomForestClassifier(), cv=KFold(n_splits=5))),
    ('randomforest', RandomForestClassifier()),    
])

# Fit the model pipeline to the training data
model_pipeline.fit(X_train, y_train)

# predict the values
predict = model_pipeline.predict(X_test)

# recall
print(f'Recall: {recall_score(predict,y_test)}')