In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier

In [2]:
def test_labels():
    import pandas as pd
    df = pd.DataFrame({'Price': [100, 102, 101, 99, 97, 105]})
    df['Forward_5d_Return'] = (df['Price'].shift(-5) - df['Price']) / df['Price']

    def label_return(r):
        if r > 0.02:
            return 1
        elif r < -0.02:
            return -1
        else:
            return 0

    df['Label'] = df['Forward_5d_Return'].apply(label_return)

    print("Test DataFrame:")
    print(df)

    expected_label_0 = 1  # From 100 to 105 = +5%
    expected_label_1 = 0  # Will be NaN return, so label = 0

    print("\nExpected Label at index 0:", expected_label_0)
    print("Actual Label at index 0:", df['Label'].iloc[0])
    print("PASS" if df['Label'].iloc[0] == expected_label_0 else "FAIL")

    print("\nExpected Label at index 1:", expected_label_1)
    print("Actual Label at index 1:", df['Label'].iloc[1])
    print("PASS" if df['Label'].iloc[1] == expected_label_1 else "FAIL")


In [3]:
test_labels()


Test DataFrame:
   Price  Forward_5d_Return  Label
0    100               0.05      1
1    102                NaN      0
2    101                NaN      0
3     99                NaN      0
4     97                NaN      0
5    105                NaN      0

Expected Label at index 0: 1
Actual Label at index 0: 1
PASS

Expected Label at index 1: 0
Actual Label at index 1: 0
PASS


In [17]:
def test_technical_indicators():
    import pandas as pd
    import numpy as np
    data = list(range(1, 21))  # 1 to 20
    df = pd.DataFrame({'Price': data})

    df['SMA_20'] = df['Price'].rolling(window=20).mean()
    df['Momentum_10'] = df['Price'] - df['Price'].shift(10)

    print("\nTechnical Indicators DataFrame:")
    print(df)

    expected_sma = sum(range(1, 21)) / 20
    actual_sma = df['SMA_20'].iloc[-1]
    print("\nExpected SMA_20 at last row:", expected_sma)
    print("Actual SMA_20 at last row:", actual_sma)
    print("PASS" if np.isclose(actual_sma, expected_sma) else "FAIL")

    expected_momentum = 20 - 10
    actual_momentum = df['Momentum_10'].iloc[-1]
    print("\nExpected Momentum_10 at last row:", expected_momentum)
    print("Actual Momentum_10 at last row:", actual_momentum)
    print("PASS" if actual_momentum == expected_momentum else "FAIL")



In [18]:
test_technical_indicators()


Technical Indicators DataFrame:
    Price  SMA_20  Momentum_10
0       1     NaN          NaN
1       2     NaN          NaN
2       3     NaN          NaN
3       4     NaN          NaN
4       5     NaN          NaN
5       6     NaN          NaN
6       7     NaN          NaN
7       8     NaN          NaN
8       9     NaN          NaN
9      10     NaN          NaN
10     11     NaN         10.0
11     12     NaN         10.0
12     13     NaN         10.0
13     14     NaN         10.0
14     15     NaN         10.0
15     16     NaN         10.0
16     17     NaN         10.0
17     18     NaN         10.0
18     19     NaN         10.0
19     20    10.5         10.0

Expected SMA_20 at last row: 10.5
Actual SMA_20 at last row: 10.5
PASS

Expected Momentum_10 at last row: 10
Actual Momentum_10 at last row: 10.0
PASS


In [19]:
def test_random_forest_predict():
    import pandas as pd
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import accuracy_score

    # Sample synthetic dataset
    df = pd.DataFrame({
        'SMA_20': [1.2, 1.5, 1.3, 1.4, 1.6],
        'Momentum_10': [0.1, 0.2, -0.1, 0.05, -0.2],
        'Volatility_10': [0.5, 0.6, 0.4, 0.55, 0.45],
        'Label': [1, 0, -1, 1, 0]
    })

    features = ['SMA_20', 'Momentum_10', 'Volatility_10']
    X = df[features]
    y = df['Label']

    clf = RandomForestClassifier(n_estimators=10, random_state=42)
    clf.fit(X, y)
    predictions = clf.predict(X)

    accuracy = accuracy_score(y, predictions)

    print("\nRandom Forest Test DataFrame:")
    print(df)
    print("\nPredictions:", predictions)
    print("Accuracy:", accuracy)
    print("PASS" if accuracy >= 0.6 else "FAIL")


In [20]:
test_random_forest_predict()


Random Forest Test DataFrame:
   SMA_20  Momentum_10  Volatility_10  Label
0     1.2         0.10           0.50      1
1     1.5         0.20           0.60      0
2     1.3        -0.10           0.40     -1
3     1.4         0.05           0.55      1
4     1.6        -0.20           0.45      0

Predictions: [ 1  0 -1  1  0]
Accuracy: 1.0
PASS
