In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Sample synthetic dataset creation
# In a real scenario, you would load historical stock data
data = {
    'moving_avg': np.random.rand(100),  # Simulated moving average values
    'volume': np.random.rand(100),        # Simulated volume data
    'momentum': np.random.rand(100),      # Simulated momentum values
    'trend': np.random.choice(['bullish', 'bearish'], 100)  # Target variable
}
df = pd.DataFrame(data)

# Feature matrix and target variable
X = df[['moving_avg', 'volume', 'momentum']]
y = df['trend']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Decision Tree Classifier
clf = DecisionTreeClassifier(max_depth=3, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
predictions = clf.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, predictions))


Accuracy: 0.55
Classification Report:
               precision    recall  f1-score   support

     bearish       0.64      0.58      0.61        12
     bullish       0.44      0.50      0.47         8

    accuracy                           0.55        20
   macro avg       0.54      0.54      0.54        20
weighted avg       0.56      0.55      0.55        20



In [3]:
# Import additional libraries for advanced tuning
from sklearn.model_selection import GridSearchCV
from sklearn.tree import export_text

# Define parameter grid for tuning
param_grid = {
    'max_depth': [3, 5, 7, 10],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(DecisionTreeClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Best model from grid search
best_clf = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

# Evaluate the best model
predictions_advanced = best_clf.predict(X_test)
accuracy_advanced = accuracy_score(y_test, predictions_advanced)
print("Advanced Model Accuracy:", accuracy_advanced)
print("Advanced Classification Report:\n", classification_report(y_test, predictions_advanced))

# Display the decision tree structure
tree_rules = export_text(best_clf, feature_names=list(X.columns))
print("Decision Tree Rules:\n", tree_rules)

# Feature importance analysis
feature_importances = pd.Series(best_clf.feature_importances_, index=X.columns)
print("Feature Importances:\n", feature_importances.sort_values(ascending=False))


Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 10}
Advanced Model Accuracy: 0.55
Advanced Classification Report:
               precision    recall  f1-score   support

     bearish       0.64      0.58      0.61        12
     bullish       0.44      0.50      0.47         8

    accuracy                           0.55        20
   macro avg       0.54      0.54      0.54        20
weighted avg       0.56      0.55      0.55        20

Decision Tree Rules:
 |--- volume <= 0.25
|   |--- volume <= 0.05
|   |   |--- class: bullish
|   |--- volume >  0.05
|   |   |--- volume <= 0.17
|   |   |   |--- class: bearish
|   |   |--- volume >  0.17
|   |   |   |--- class: bearish
|--- volume >  0.25
|   |--- volume <= 0.68
|   |   |--- volume <= 0.44
|   |   |   |--- volume <= 0.30
|   |   |   |   |--- class: bullish
|   |   |   |--- volume >  0.30
|   |   |   |   |--- class: bearish
|   |   |--- volume >  0.44
|   |   |   |--- momentum <= 0.38
|   |   |   |   |--