In [1]:
import pandas as pd
import numpy as np
from scipy.signal import argrelextrema
import matplotlib.pyplot as plt
import os

# Adjust the path as needed
data_path = '../../data/nifty/train/dow_theory_trend.csv'

df = pd.read_csv(data_path)

# Ensure date is datetime and sort
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date').reset_index(drop=True)

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

# Optional: preview data
print("Data preview:")
print(df.head())
# date,open,high,low,close,volume,dow_trend_spec

df['date'] = pd.to_datetime(df['date'], errors='coerce')

# --- Step 3: Create useful time features ---
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['hour'] = df['date'].dt.hour
df['dayofweek'] = df['date'].dt.dayofweek

# --- Step 4: Drop the original date column ---
df = df.drop(columns=['date'])

# --- Step 5: Encode target labels (Uptrend, Downtrend, Sideways) ---
label_encoder = LabelEncoder()
df['dow_trend_spec_encoded'] = label_encoder.fit_transform(df['final_trend'])

# --- Step 6: Prepare features (X) and target (y) ---
X = df.drop(columns=['final_trend', 'dow_trend_spec_encoded'])
y = df['dow_trend_spec_encoded']

# --- Step 7: Split data ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --- Step 8: Train Random Forest model ---
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

import joblib
from pathlib import Path

# Define save path
model_dir = Path("models")  # or your own directory path
model_dir.mkdir(parents=True, exist_ok=True)

model_file_path = '../../model/dow_theory_trend_regression_model.pkl'

# Save model
joblib.dump(model, model_file_path)

print(f"✅ Model saved successfully at: {model_file_path}")


from sklearn.metrics import classification_report, confusion_matrix

# --- Model evaluation ---
y_pred = model.predict(X_test)

print("\n✅ Model Evaluation:")

# Ensure labels and target names match the encoder’s full set
labels = list(range(len(label_encoder.classes_)))
target_names = label_encoder.classes_

print(confusion_matrix(y_test, y_pred, labels=labels))
print(classification_report(y_test, y_pred, labels=labels, target_names=target_names, zero_division=0))


# --- Step 11: Add predictions back into DataFrame ---
results = X_test.copy()
results['Actual_Label'] = label_encoder.inverse_transform(y_test)
results['Predicted_Label'] = label_encoder.inverse_transform(y_pred)

print("\n🔍 Sample Predictions:")
print(results.head())

# --- Optional: Save results ---
results.to_csv("predicted_trends.csv", index=False)
print("\n📁 Results saved to 'predicted_trends.csv'")

Data preview:
                       date     open     high      low    close  volume  \
0 2001-01-01 00:00:00+05:30  1263.50  1276.15  1250.65  1254.30       0   
1 2001-01-02 00:00:00+05:30  1254.25  1279.59  1248.55  1271.80       0   
2 2001-01-03 00:00:00+05:30  1271.80  1293.55  1263.95  1291.25       0   
3 2001-01-04 00:00:00+05:30  1291.30  1331.35  1291.30  1307.65       0   
4 2001-01-05 00:00:00+05:30  1307.55  1330.30  1306.25  1327.25       0   

  final_trend  
0    Sideways  
1     Uptrend  
2     Uptrend  
3     Uptrend  
4    Sideways  
✅ Model saved successfully at: ../../model/dow_theory_trend_regression_model.pkl

✅ Model Evaluation:
[[223  28 189]
 [ 57  18 103]
 [126  23 451]]
              precision    recall  f1-score   support

   Downtrend       0.55      0.51      0.53       440
    Sideways       0.26      0.10      0.15       178
     Uptrend       0.61      0.75      0.67       600

    accuracy                           0.57      1218
   macro avg       