# ML Pipeline for Trading Bot (fixed)

This notebook includes an improved data-file selector that works in Codespaces, local Jupyter, and Colab (with mount). It then performs EDA and feature engineering as a skeleton pipeline.

In [None]:
# Minimal imports
import os, glob, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import classification_report, accuracy_score
pd.set_option('display.max_columns', 200)

In [None]:
# Search common locations for data/ and load the first CSV found
candidates = [
    '/workspaces/POC_Safem0de_IS/data',
    os.path.join(os.getcwd(), 'data'),
    '/content/data'
]
if 'DATA_DIR' in os.environ:
    candidates.insert(0, os.environ['DATA_DIR'])

csv_files = []
data_dir = None
for d in candidates:
    if d and os.path.isdir(d):
        found = sorted(glob.glob(os.path.join(d, '*.csv')))
        if found:
            csv_files = found
            data_dir = d
            break
if not csv_files:
    csv_files = sorted(glob.glob(os.path.join(os.getcwd(), '*.csv')))
    if csv_files:
        data_dir = os.getcwd()

if not csv_files:
    raise FileNotFoundError(f'No CSV files found. Checked candidates: {candidates} and cwd={os.getcwd()}.')

print('Found files in', data_dir)
for i,f in enumerate(csv_files):
    print(i, '-', f)

# Default: load the first file (change index if you want another)
DATA_PATH = csv_files[0]
print('Loading', DATA_PATH)
df = pd.read_csv(DATA_PATH)
if len(df.columns) >= 6:
    df.columns = ['datetime','open','high','low','close','volume'] + list(df.columns[6:])
df['datetime'] = pd.to_datetime(df['datetime'])
df = df.sort_values('datetime').reset_index(drop=True)
df.set_index('datetime', inplace=True)
print('shape', df.shape)
display(df.head())

## Quick EDA
Print info and a short plot.

In [None]:
print(df.info())
print('Missing per column:', df.isna().mean())
_ = df['close'].iloc[-500:].plot(title='Close (last 500 points)', figsize=(12,4))

## Feature Engineering
Adds simple technical features.

In [None]:
def add_basic_features(df):
    x = df.copy()
    x['ret_1'] = x['close'].pct_change(1)
    x['ret_3'] = x['close'].pct_change(3)
    x['ma_5'] = x['close'].rolling(5).mean()
    x['ma_20'] = x['close'].rolling(20).mean()
    x['std_20'] = x['close'].rolling(20).std()
    delta = x['close'].diff()
    up = delta.clip(lower=0)
    down = -1 * delta.clip(upper=0)
    roll_up = up.rolling(14).mean()
    roll_down = down.rolling(14).mean()
    rs = roll_up / roll_down
    x['rsi14'] = 100 - (100 / (1 + rs))
    return x.dropna()

df_feat = add_basic_features(df)
print('after features:', df_feat.shape)
display(df_feat.head())