
# Modeling Walkthrough

This notebook demonstrates training and evaluating a simple classifier on the feature engineered dataset. It splits the data into training and test sets, trains a model, evaluates performance and discusses results.


In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Load feature data
features = pd.read_csv('../data/processed/features.csv', parse_dates=['date'])

# Construct target (next-day return direction)
features['target'] = (features['returns'].shift(-1) > 0).astype(int)

# Drop last row with NaN target
features = features.dropna(subset=['target'])

X = features[[col for col in features.columns if col not in ['date', 'returns', 'sentiment', 'target']]]
y = features['target']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predictions and evaluation
preds = model.predict(X_test)
print('Accuracy:', accuracy_score(y_test, preds))
print('Precision:', precision_score(y_test, preds))
print('Recall:', recall_score(y_test, preds))
