<a href="https://colab.research.google.com/github/shardwick/ML-Projects/blob/main/Metrics_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
import datetime as datetime

# Load the dataset
data = pd.read_csv('synthetic_market_trends_data.csv')

# Preview the data
print(data.head())

              datetime        open        high         low       close  \
0  2023-01-01 20:00:00  159.165661  161.322600  153.589315  163.916731   
1  2023-01-01 21:00:00  134.928652  137.046453  133.213415  131.552938   
2  2023-01-01 22:00:00  191.273149  196.505729  185.699559  191.627306   
3  2023-01-01 23:00:00  168.238367  172.105449  159.481558  170.222485   
4  2023-01-02 00:00:00  138.211453  144.232933  132.108881  141.089945   

   volume      SMA_20      EMA_20   returns  volatility        RSI      MACD  \
0     328  143.333865  138.427180  0.638015    0.347630  51.105085  5.377478   
1     400  140.382191  137.772490 -0.197440    0.317039  47.278460  4.198134   
2     523  144.533882  142.901520  0.456655    0.311268  53.835299  8.018562   
3     378  145.595745  145.503517 -0.111700    0.305658  48.562288  9.212889   
4     303  146.721784  145.083177 -0.171144    0.304299  54.212253  7.719661   

   Signal_Line  
0     6.633276  
1     6.146248  
2     6.520711  
3     

In [3]:
def compute_rsi(data, window):
    delta = data.diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))


In [6]:
# Data preprocessing steps
data['datetime'] = pd.to_datetime(data['datetime'])
data.set_index('datetime', inplace=True)

# Handle missing values
data.fillna(method='ffill', inplace=True)

# Feature engineering: Create technical indicators
data['SMA_20'] = data['close'].rolling(window=20).mean()
data['EMA_20'] = data['close'].ewm(span=20, adjust=False).mean()
data['RSI'] = compute_rsi(data['close'], window=14)  # Implement compute_rsi function

# Additional features
data['returns'] = data['close'].pct_change()
data['volatility'] = data['returns'].rolling(window=20).std()

# Drop rows with NaN values created by rolling windows
data.dropna(inplace=True)

print(data.head())

                           open        high         low       close  volume  \
datetime                                                                      
2023-01-02 16:00:00  119.647659  126.215544  115.826681  122.472679     318   
2023-01-02 17:00:00  185.026920  192.250515  180.366843  180.745588     508   
2023-01-02 18:00:00  132.824402  141.636976  126.793746  128.728243     440   
2023-01-02 19:00:00  169.095356  178.111154  164.505310  170.749094     615   
2023-01-02 20:00:00  109.180313  111.247166  107.252961  112.134486     502   

                         SMA_20      EMA_20   returns  volatility        RSI  \
datetime                                                                       
2023-01-02 16:00:00  142.415759  141.570011 -0.012607    0.164552  45.394039   
2023-01-02 17:00:00  144.875392  145.301018  0.475803    0.189578  59.897653   
2023-01-02 18:00:00  141.730439  143.722658 -0.287793    0.174043  50.593811   
2023-01-02 19:00:00  141.756769  146.296605  0

  data.fillna(method='ffill', inplace=True)


# Feature Engineering 

In [8]:
# create technical indicators 

def compute_rsi(data, window):
    delta = data.diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

# Compute additional technical indicators
data['MACD'] = data['close'].ewm(span=12, adjust=False).mean() - data['close'].ewm(span=26, adjust=False).mean()
data['Signal_Line'] = data['MACD'].ewm(span=9, adjust=False).mean()


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Define features and target
features = ['SMA_20', 'EMA_20', 'RSI', 'volatility', 'MACD', 'Signal_Line']
target = 'close'

X = data[features]
y = (data[target].shift(-1) > data[target]).astype(int)  # Binary classification: 1 if next close price is higher, else 0

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# XGBoost model
import xgboost as xgb

xgb_model = xgb.XGBClassifier()
xgb_model.fit(X_train, y_train)

y_pred_xgb = xgb_model.predict(X_test)

# Evaluation
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("XGBoost F1 Score:", f1_score(y_test, y_pred_xgb))

# LightGBM model
import lightgbm as lgb

lgb_model = lgb.LGBMClassifier()
lgb_model.fit(X_train, y_train)

y_pred_lgb = lgb_model.predict(X_test)

# Evaluation
print("LightGBM Accuracy:", accuracy_score(y_test, y_pred_lgb))
print("LightGBM F1 Score:", f1_score(y_test, y_pred_lgb))

# CatBoost model
import catboost as cb

cb_model = cb.CatBoostClassifier(verbose=0)
cb_model.fit(X_train, y_train)

y_pred_cb = cb_model.predict(X_test)

# Evaluation
print("CatBoost Accuracy:", accuracy_score(y_test, y_pred_cb))
print("CatBoost F1 Score:", f1_score(y_test, y_pred_cb))


XGBoost Accuracy: 0.6885057471264368
XGBoost F1 Score: 0.6978818283166109


AttributeError: module 'pandas.core.strings' has no attribute 'StringMethods'