<a href="https://colab.research.google.com/github/khaled-wsa/openai-cookbook/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi

In [None]:
!git clone https://github.com/rapidsai-community/rapidsai-csp-utils.git

In [None]:
!bash rapidsai-csp-utils/colab/rapids-colab.sh
import sys, os 


In [None]:
!bash /content/rapidsai-csp-utils/colab/rapids-colab.sh
import sys, os

In [None]:
# This will update the Colab environment and restart the kernel.
!bash rapidsai-csp-utils/colab/update_gcc.sh
import os
os._exit(00)

In [None]:
## Installing CondaColab.  This will restart your kernel again
import condacolab
condacolab.install()

In [None]:
import condacolab
condacolab.check()

In [None]:
    # Installing RAPIDS is now 'python rapidsai-csp-utils/colab/install_rapids.py <release> <packages>'
    # The <release> options are 'stable' and 'nightly'.  Leaving it blank or adding any other words will default to stable.
    # The <packages> option are default blank or 'core'.  By default, we install RAPIDSAI and BlazingSQL.  The 'core' option will install only RAPIDSAI and not include BlazingSQL, 
    !python rapidsai-csp-utils/colab/install_rapids.py nightly
    import os
    os.environ['NUMBAPRO_NVVM'] = '/usr/local/cuda/nvvm/lib64/libnvvm.so'
    os.environ['NUMBAPRO_LIBDEVICE'] = '/usr/local/cuda/nvvm/libdevice/'
    os.environ['CONDA_PREFIX'] = '/usr/local'

In [None]:
!pip install pandas
!pip install numpy
!pip install scikit-learn
!pip install joblib
!pip install cudf-cu11 dask-cudf-cu11 --extra-index-url=https://pypi.nvidia.com
!pip install cuml-cu11 --extra-index-url=https://pypi.nvidia.com
!pip install cugraph-cu11 --extra-index-url=https://pypi.nvidia.com


In [None]:
!nvidia-smi

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib
from cuml.cluster import KMeans as KMeansGPU

# Constants
INITIAL_CAPITAL = 10000
TARGET_GAIN = 2.0
BEST_MODEL_FILE = "best_model.pkl"

def load_data(file_path):
    data = pd.read_csv(file_path, delimiter="\t")
    data["datetime"] = pd.to_datetime(data["<DATE>"] + " " + data["<TIME>"])
    data.set_index("datetime", inplace=True)
    data["returns"] = data["<CLOSE>"].pct_change()
    data.dropna(inplace=True)
    return data

def split_data(data):
    return train_test_split(data, test_size=0.2, shuffle=False)

def preprocess_data(train_data, test_data):
    scaler = StandardScaler()
    train_features = scaler.fit_transform(train_data[["<CLOSE>", "returns"]])
    test_features = scaler.transform(test_data[["<CLOSE>", "returns"]])
    return train_features, test_features

def create_model(train_features, n_clusters):
    kmeans = KMeansGPU(n_clusters=n_clusters, n_init=10)
    kmeans.fit(train_features)
    return kmeans

def trading_strategy_with_position_sizing(data, initial_capital):
    position = 0
    units = 0
    capital = initial_capital
    pnl = []

    for index, row in data.iterrows():
        if row["cluster"] == 0 and position == 0:
            position = 1
            units = capital // row["<CLOSE>"]
            capital -= units * row["<CLOSE>"]
        elif row["cluster"] == 1 and position == 1:
            position = 0
            capital += units * row["<CLOSE>"]
            units = 0

        pnl.append(units * row["returns"])

    return pnl, capital

def find_best_strategy(train_features, test_features, test_data, initial_capital, target_gain):
    best_gain = 0
    best_parameters = None
    best_model = None
    best_pnl = None

    for n_clusters in range(2, 11):
        kmeans = create_model(train_features, n_clusters)
        test_data["cluster"] = kmeans.predict(test_features)

        pnl, remaining_capital = trading_strategy_with_position_sizing(test_data, initial_capital)
        gain = (remaining_capital - initial_capital) / initial_capital

        print(f"Trying n_clusters={n_clusters}, gain={gain:.2%}")

        if gain > best_gain:
            best_gain = gain
            best_parameters = n_clusters
            best_model = kmeans
            best_pnl = pnl

            # Save the best model
            joblib.dump(best_model, BEST_MODEL_FILE)

    # Iteratively reinvest the profits until the target gain is reached
    current_gain = best_gain
    current_capital = initial_capital
    while current_gain < target_gain:
        current_capital += sum(best_pnl)
        pnl, remaining_capital = trading_strategy_with_position_sizing(test_data, current_capital)
        current_gain = (remaining_capital - initial_capital) / initial_capital

    return best_parameters, current_gain

if __name__ == "__main__":
    data = load_data("XAUUSD.csv")
    train_data, test_data = split_data(data)
    train_features, test_features = preprocess_data(train_data, test_data)

    best_parameters, best_gain = find_best_strategy(train_features, test_features, test_data, INITIAL_CAPITAL, TARGET_GAIN)
    print("Best parameters:", best_parameters)
    print("Best gain:", best_gain)

    best_model = joblib.load(BEST_MODEL_FILE)
