In [3]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import recall_score
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import RFECV
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold, StratifiedKFold

In [4]:
train_data = pd.read_csv('../../data/raw/train.csv',  encoding= 'unicode_escape')
y_train = train_data["Unusual"]                      #defining the labels
X_train = train_data.drop(["Unusual"], axis=1)
test_data = pd.read_csv('../../data/raw/test.csv',  encoding= 'unicode_escape')
y_test = test_data["Unusual"]                      #defining the labels
X_test = test_data.drop(["Unusual"], axis=1)

In [6]:
# Define the model pipeline
#RFECV (Recursive Feature Elimination with Cross-Validation)

model_pipeline = Pipeline([
    ('preprocessing', joblib.load('../pipelines/PreprocessingPipeline.joblib')),
    ('feature_selection', RFECV(estimator=RandomForestClassifier(), cv=KFold(n_splits=5))),
    ('randomforest', RandomForestClassifier()),    
])

# Fit the model pipeline to the training data
model_pipeline.fit(X_train, y_train)

# predict the values
predict = model_pipeline.predict(X_test)

# recall
print(f'Recall: {recall_score(predict,y_test)}')

Recall: 0.9969656102494943


In [5]:
# Define the model pipeline
model_pipeline = Pipeline([
    ('preprocessing', joblib.load('../pipelines/PreprocessingPipeline.joblib')),
    ('xgboost', RandomForestClassifier()),    
]) 

# KFold cross-validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Perform cross-validation
scores = cross_validate(estimator= model_pipeline, X=X_train, y=y_train, cv=cv, scoring='recall',return_train_score=True)



In [7]:
print(f'Training accuracy: {scores["train_score"].mean()}')
print(f'Testing accuracy: {scores["test_score"].mean()}')

Training accuracy: 0.9999652596838631
Training accuracy: 0.6533279283453015


In [15]:
# Save the trained pipeline to a file
joblib.dump(model_pipeline, '../../models/PickleModel/model_pipeline.pkl')

['../../models/PickleModel/model_pipeline.pkl']

In [None]:
cross_validate.__