In [24]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.ensemble import GradientBoostingClassifier

# Needed for decision tree visualization
import pydotplus
from IPython.display import Image

In [25]:
file_path = Path("MSFT_df.csv")
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Returns,50 Day MA,Upper,Lower,Trading Signal,Entry/Exit,TradingSignals2
0,2011-02-10,27.93,27.940001,27.290001,27.5,21.864422,76672400,-0.016804,27.9144,28.636182,26.778219,1.0,1.0,Buy
1,2011-02-11,27.76,27.809999,27.07,27.25,21.665649,83939700,-0.009091,27.9386,28.268699,26.919901,1.0,0.0,Hold
2,2011-02-14,27.209999,27.27,26.950001,27.23,21.649748,56766200,-0.000734,27.9454,28.224377,26.951022,1.0,0.0,Hold
3,2011-02-15,27.040001,27.33,26.950001,26.959999,21.561775,44116500,-0.009916,27.9442,27.959069,26.94513,1.0,0.0,Hold
4,2011-02-16,27.049999,27.07,26.6,27.02,21.609766,70817900,0.002226,27.9478,28.004014,26.963787,1.0,0.0,Hold


In [49]:
# X is the input variable

X = df[['Open', 'High', 'Low', 'Close']]

# Y is the target

y = df[['Entry/Exit']].values.reshape(-1, 1)

In [50]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.70, random_state=78)

In [51]:
# Create the StandardScaler instance
scaler = StandardScaler()

In [52]:
# Fit the Standard Scaler with the training data
X_scaler = scaler.fit(X_train)

In [53]:
# Scale the training data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [54]:
 # Choose learning rate
learning_rates = [0.05, 0.1, 0.25, 0.5, 0.75, 1]
for learning_rate in learning_rates:
    model = GradientBoostingClassifier(
        n_estimators=100,
        learning_rate=learning_rate,
        max_features=2,
        max_depth=3,
        random_state=0)
    model.fit(X_train_scaled,y_train.ravel())
    print("Learning rate: ", learning_rate)

    # Score the model
    print("Accuracy score (training): {0:.3f}".format(
        model.score(
            X_train_scaled,
            y_train.ravel())))
    print("Accuracy score (validation): {0:.3f}".format(
        model.score(
            X_test_scaled,
            y_test.ravel())))
    print()

Learning rate:  0.05
Accuracy score (training): 0.961
Accuracy score (validation): 0.913

Learning rate:  0.1
Accuracy score (training): 0.988
Accuracy score (validation): 0.910

Learning rate:  0.25
Accuracy score (training): 1.000
Accuracy score (validation): 0.891

Learning rate:  0.5
Accuracy score (training): 1.000
Accuracy score (validation): 0.888

Learning rate:  0.75
Accuracy score (training): 1.000
Accuracy score (validation): 0.883

Learning rate:  1
Accuracy score (training): 1.000
Accuracy score (validation): 0.881



In [64]:
# Create GradientBoostingClassifier model
model = GradientBoostingClassifier(
    n_estimators=500,
    learning_rate=1,
    max_features=4,
    max_depth=3,
    random_state=0)

# Fit the model
model.fit(X_train_scaled,y_train.ravel())

# Score the model
print("Accuracy score (training): {0:.3f}".format(
    model.score(
        X_train_scaled,
        y_train)))
print("Accuracy score (validation): {0:.3f}".format(
    model.score(
        X_test_scaled,
        y_test)))

Accuracy score (training): 1.000
Accuracy score (validation): 0.885


In [65]:
# Make predictions
predictions = model.predict(X_test_scaled)

# Generate accuracy score for predictions using y_test
accuracy_score(y_test, predictions)

0.8849045691150954

In [70]:
# Calculating the confusion matrix

cm = confusion_matrix(y_test, predictions)


cm_df = pd.DataFrame(
    cm, index=["Actual Sell", "Actual Hold", "Actual Buy"], columns=["Predicted Sell", "Predicted Hold", "Predicted Buy"]
)



In [68]:
# Generate classification report
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

        -1.0       0.16      0.10      0.12        71
         0.0       0.92      0.95      0.94      1588
         1.0       0.17      0.11      0.14        70

    accuracy                           0.88      1729
   macro avg       0.42      0.39      0.40      1729
weighted avg       0.86      0.88      0.87      1729



In [71]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted Sell,Predicted Hold,Predicted Buy
Actual Sell,7,63,1
Actual Hold,34,1515,39
Actual Buy,2,60,8


Accuracy Score : 0.8849045691150954
Classification Report
              precision    recall  f1-score   support

        -1.0       0.16      0.10      0.12        71
         0.0       0.92      0.95      0.94      1588
         1.0       0.17      0.11      0.14        70

    accuracy                           0.88      1729
   macro avg       0.42      0.39      0.40      1729
weighted avg       0.86      0.88      0.87      1729

