In [5]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score
from sklearn.model_selection import ParameterGrid

# Load the dataset
train_data = pd.read_csv('train_data.csv')
val_data = pd.read_csv('val_data.csv')
test_data = pd.read_csv('test_data.csv')

# Ensure the data is sorted by the 'Entry_Date' column
train_data['Entry_Date'] = pd.to_datetime(train_data['Entry_Date'])
val_data['Entry_Date'] = pd.to_datetime(val_data['Entry_Date'])
test_data['Entry_Date'] = pd.to_datetime(test_data['Entry_Date'])

train_data = train_data.sort_values(by='Entry_Date')
val_data = val_data.sort_values(by='Entry_Date')
test_data = test_data.sort_values(by='Entry_Date')

# Define the feature sets manually
selected_features = ['SMA5_At_Entry', 'SMA10_At_Entry', 'EMA5_At_Entry', 'EMA15_At_Entry', 'RSI5_At_Entry', 'RSI10_At_Entry',
                     'ATR5_At_Entry', 'ATR15_At_Entry', 'Stoch7_K_At_Entry', 'Stoch21_K_At_Entry',
                     'BB10_High_At_Entry', 'BB10_Low_At_Entry', 'BB10_MAvg_At_Entry', 
                     'BB15_High_At_Entry', 'BB15_Low_At_Entry', 'BB15_MAvg_At_Entry', 
                     'Open', 'High', 'Low', 'Last', 'MACD_At_Entry', 'Day_Of_Week_At_Entry',
                     'ROC14_At_Entry' , 'ROC15_At_Entry'
                     ]

# Separating features and target variable
X_train = train_data[selected_features]
y_train = train_data['Target']
X_val = val_data[selected_features]
y_val = val_data['Target']
X_test = test_data[selected_features]
y_test = test_data['Target']

# Define parameter grid for Random Forest
param_grid = {
    'n_estimators': [5, 10, 20],
    'max_depth': [None, 10, 20],
    'min_samples_split': [10, 25, 50],
    'min_samples_leaf': [5, 10, 20],
    'bootstrap': [True, False]
}

# Initialize a list to store results
results = []

# Perform grid search
for i, params in enumerate(ParameterGrid(param_grid)):
    rf_clf = RandomForestClassifier(**params, random_state=42)
    
    # Fit the model to the training data
    rf_clf.fit(X_train, y_train)
    
    # Predict on the validation set
    y_val_pred = rf_clf.predict(X_val)
    
    # Calculate evaluation metrics for the validation set
    val_accuracy = accuracy_score(y_val, y_val_pred)
    val_roc_auc = roc_auc_score(y_val, y_val_pred)
    
    # Predict on the test set
    y_test_pred = rf_clf.predict(X_test)
    
    # Calculate evaluation metrics for the test set
    test_accuracy = accuracy_score(y_test, y_test_pred)
    test_roc_auc = roc_auc_score(y_test, y_test_pred)
    
    # Store the results
    results.append({
        'params': params,
        'val_accuracy': val_accuracy,
        'val_roc_auc': val_roc_auc,
        'test_accuracy': test_accuracy,
        'test_roc_auc': test_roc_auc
    })
    
    # Save results to a CSV file every ten iterations
    if (i + 1) % 10 == 0:
        results_df = pd.DataFrame(results)
        results_df.to_csv('grid_search_results.csv', index=False)
    
    # Print the current results
    print(f"Iteration: {i + 1}")
    print(f"Params: {params}")
    print(f"Validation Accuracy: {val_accuracy}")
    print(f"Validation ROC AUC: {val_roc_auc}")
    print(f"Test Accuracy: {test_accuracy}")
    print(f"Test ROC AUC: {test_roc_auc}")
    print("-" * 60)

# Final save after all iterations
results_df = pd.DataFrame(results)
results_df.to_csv('grid_search_results_retest.csv', index=False)

# Display the final results
print("Final Results:")
print(results_df)


Iteration: 1
Params: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 5, 'min_samples_split': 10, 'n_estimators': 5}
Validation Accuracy: 0.5226781857451404
Validation ROC AUC: 0.5229177555439409
Test Accuracy: 0.5210355987055016
Test ROC AUC: 0.5209751147440255
------------------------------------------------------------
Iteration: 2
Params: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 5, 'min_samples_split': 10, 'n_estimators': 10}
Validation Accuracy: 0.5431965442764579
Validation ROC AUC: 0.5432968715000374
Test Accuracy: 0.5285868392664509
Test ROC AUC: 0.5284043830821222
------------------------------------------------------------
Iteration: 3
Params: {'bootstrap': True, 'max_depth': None, 'min_samples_leaf': 5, 'min_samples_split': 10, 'n_estimators': 20}
Validation Accuracy: 0.5269978401727862
Validation ROC AUC: 0.5267770477114911
Test Accuracy: 0.5329018338727076
Test ROC AUC: 0.5327893271764125
---------------------------------------------------------