In [1]:
import turicreate as tc
import pandas as pd
import random
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot

In [14]:
location = "../../symphony/backtest/results/demark/demark.csv.gz"
data_columns = [
    "IsPerfect", 
    "DWaveUp", 
    "DWaveDown", 
    "DerivativeOscillator", 
    "DerivativeOscillatorSignal", 
    "DerivativeOscillatorRule", 
    "CandlestickPattern",
    "CandlestickPatternDirection",
    "MassIndex",
    "NATR",
    "ADX",
    "TDREI",
    "TDPOQ",
    "DemarkerI",
    "DemarkerIOversold",
    "DemarkerIOverbought",
    "TDPressure",
    "TDPressureOversold",
    "TDPressureOverbought",
    "IsZigZag",
    "HarmonicsPattern"
]
data_columns = ['IsPerfect', 'dwave_up', 'dwave_down', 'derivative_oscillator', 'derivative_oscillator_signal', 'DerivativeOscillatorRule', 'candlestick_pattern', 'candlestick_pattern_direction', 'mass_index', 'natr', 'adx', 'td_rei', 'td_poq', 'td_demarker_i', 'DemarkerIOversold', 'DemarkerIOverbought', 'td_pressure', 'TDPressureOversold', 'TDPressureOverbought', 'zigzag', 'harmonic']
numeric_features = [
    "DerivativeOscillator",
    "DerivativeOscillatorSignal", 
    "MassIndex",
    "TDREI",
    "DemarkerI",
    "TDPressure",
]
categorical_features = [
    "IsPerfect", 
    "DWaveUp", 
    "DWaveDown",
    "DerivativeOscillatorRule",
    "CandlestickPattern",
    "CandlestickPatternDirection",
    "TDPOQ",
    "DemarkerIOversold",
    "DemarkerIOverbought",
    "TDPressureOversold",
    "TDPressureOverbought",
    "IsZigZag",
    "HarmonicsPattern"
]
label_column = [
    "Profitable"
]
results_df = pd.read_csv(location, compression='gzip')

def apply_transformations(sf):
    for row in sf:
        if row["Profitable"] == "True" and row["PNLPerc"] <0:
            row["Profitable"] = "False"
    
    sf["Profitable"] = sf["Profitable"].apply(lambda x: 1 if x == "True" else 0)
    sf = sf[data_columns + label_column]
    sf['dwave_up'] = sf['dwave_up'].astype(str)
    sf['dwave_down'] = sf['dwave_down'].astype(str)
    
    if 'harmonic' in sf.column_names():
        sf['harmonic'] = sf['harmonic'].astype(str)
    if 'td_pressure' in sf.column_names():
        sf = sf.dropna('td_pressure')
    return sf

def get_data(results_df):
    return tc.SFrame(location)

In [15]:
data = get_data(results_df)


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,str,int,str,str,str,str,int,int,float,float,str,str,str,float,float,float,float,float,str,float,str,str,float,str,str,str,str,str,float,float,int,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


In [18]:
list(results_df[results_df["Symbol"] == "AIONUSDT"]["Profitable"])

[True,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 True,
 True,
 False,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 True,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 True,
 True,
 True,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 True,
 True,
 False,
 False,
 True]

In [12]:
list(results_df["Symbol"].unique())

['ONTUSDT',
 'XMRUSDT',
 'EOSUSDT',
 'BTTUSDT',
 'ETCUSDT',
 'ATOMUSDT',
 'HOTUSDT',
 'ZRXUSDT',
 'COSUSDT',
 'ALGOUSDT',
 'OMGUSDT',
 'WAVESUSDT',
 'GTOUSDT',
 'ONEUSDT',
 'DASHUSDT',
 'LTCUSDT',
 'LINKUSDT',
 'NANOUSDT',
 'VETUSDT',
 'DOGEUSDT',
 'NULSUSDT',
 'FETUSDT',
 'IOTAUSDT',
 'BNBUSDT',
 'QTUMUSDT',
 'KEYUSDT',
 'TOMOUSDT',
 'ONGUSDT',
 'ICXUSDT',
 'ETHUSDT',
 'MATICUSDT',
 'TRXUSDT',
 'ZECUSDT',
 'TFUELUSDT',
 'ADAUSDT',
 'ZILUSDT',
 'XLMUSDT',
 'ENJUSDT',
 'FTMUSDT',
 'CELRUSDT',
 'MTLUSDT',
 'BATUSDT',
 'NEOUSDT',
 'MITHUSDT',
 'XRPUSDT',
 'DUSKUSDT',
 'STRAXBUSD',
 'SYSBUSD',
 'WINGBUSD',
 'PONDBUSD',
 'AXSBUSD',
 'EGLDBUSD',
 'AUDIOBUSD',
 'SKLBUSD',
 'BAKEBUSD',
 'YFIIBUSD',
 'TRBBUSD',
 'CHZBUSD',
 'RSRBUSD',
 'SFPBUSD',
 'DEGOBUSD',
 'CTKBUSD',
 'FLMBUSD',
 'OMBUSD',
 'BELBUSD',
 'ROSEBUSD',
 '1INCHBUSD',
 'ONEBUSD',
 'XVSBUSD',
 'UNFIBUSD',
 'DIABUSD',
 'AERGOBUSD',
 'CFXBUSD',
 'BTGBUSD',
 'ALPHABUSD',
 'KSMBUSD',
 'DOTBUSD',
 'TLMBUSD',
 'TKOBUSD',
 'AVAXBUSD',
 'O

In [None]:
len(results_df[results_df["Symbol"] == "XLMUSDT"])

In [3]:
def train_ensemble(train_data, num_models=10, verbose=False, row_subsample=False, max_iterations=10, bagging=True, bagging_split=0.9):
    log_models = 0
    bt_models = 0
    rf_models = 0
    models = []
    
    def get_sub_train_data():
        if bagging:
            sub_train_data, _ = train_data.random_split(bagging_split)
        else:
            sub_train_data = train_data
        return sub_train_data
    
    while log_models < num_models:
        models.append(tc.logistic_classifier.create(get_sub_train_data(), target=label_column[0], class_weights = 'auto', verbose=verbose))
        log_models += 1
    
    while bt_models < num_models:
        models.append(tc.boosted_trees_classifier.create(get_sub_train_data(), target=label_column[0], column_subsample=True, max_iterations=max_iterations, class_weights = 'auto', verbose=verbose))
        bt_models += 1
    
    while rf_models < num_models:
        models.append(tc.random_forest_classifier.create(get_sub_train_data(), target=label_column[0], column_subsample=True, max_iterations=max_iterations, class_weights = 'auto', verbose=verbose))
        rf_models += 1
    return models
    
def train_svm_models(train_data, num_models=10, verbose=False, max_iterations=13):
    svm_models = 0
    models = []
    while svm_models < num_models:
        models.append(tc.svm_classifier.create(train_data, target=label_column[0], max_iterations=max_iterations, class_weights = 'auto', verbose=verbose))
        svm_models += 1
    return models
        
    
def train_ensemble_on_symbols(symbols, data, split_perc=0.8, num_models=10, verbose=False, row_subsample=False):
    """
    Trains only on a subset of symbols
    """
    if not len(symbols):
        raise Exception(f"Symbols array cannot be empty")
    
    filtered_data = data.filter_by(symbols, 'Symbol')
    filtered_data = apply_transformations(filtered_data)
    train_data, test_data = filtered_data.random_split(split_perc)
    models = train_ensemble(train_data, num_models=num_models, verbose=verbose, row_subsample=row_subsample)
    return models, train_data, test_data
        
def get_probs(models, test_data):
    probs = []
    for model in models:
        probs.append(model.predict(test_data, output_type = 'probability'))
    return probs

def average_probabilities(probabilities):

    averaged_probs = []
    for i in range(len(probabilities[0])):
        averaged_probs.append(sum([pred_prob[i] for pred_prob in probabilities]) / len(probabilities))
    return averaged_probs
    

In [4]:
def evaluate_model(averaged_probs, test_data, threshold=0.6):
    threshold = threshold
    labels = {
        "False/False": 0,
        "False/True": 0,
        "True/True": 0,
        "True/False": 0
    }
    results = {
        "precision": None,
        "recall": None,
        "specificity": None,
        "auc": None,
        "f1_score": None,
        "num_targets": None,
        "labels": None
    }
    def label_is_true(index) -> bool:
        if test_data["Profitable"][index] == "True" or test_data["Profitable"][index] == 1:
            return True
        return False
    
    target_labels = list(test_data["Profitable"])
    for i in range(len(averaged_probs)):
        prob = averaged_probs[i]
        target_label = target_labels[i]

        if prob > threshold:
            if label_is_true(i):
                labels["True/True"] += 1
            else:
                labels["False/True"] += 1
        else:
            if label_is_true(i):
                labels["True/False"] += 1
            else:
                labels["False/False"] += 1
    
    
    total_predicted_true = labels["True/True"] + labels["False/True"]
    total_actually_true = (labels["True/True"] + labels["True/False"])

    results["precision"] = 0 if total_predicted_true == 0 else labels["True/True"] / (labels["True/True"] + labels["False/True"])
    results["recall"] = 0 if total_actually_true == 0 else labels["True/True"] / (labels["True/True"] + labels["True/False"])
    
    if all(list(target_labels)):
        auc_score = 1.0
    elif not any(list(target_labels)):
        auc_score = 0.0
    else: 
        auc_score = roc_auc_score(target_labels, averaged_probs)
        
    results["auc"] = auc_score
    results["f1_score"] = 0 if not results["precision"] and not results["recall"] else 2 * ((results["precision"] * results["recall"]) / (results["precision"] + results["recall"]))
    results["specificity"] = labels["False/False"] / (labels["False/False"] + labels["False/True"])
    results["num_targets"] = len(target_labels)
    results["labels"] = labels
    return results

In [5]:
def evaluate_generalization(data, num_models=10, row_subsample=False, verbose=False, split_perc = 0.8):
    """
    Holds out symbols and evaluates ensemble performance on the holdout symbol.
    
    """
    symbols = data["Symbol"].unique()
    results = dict(zip(symbols, [None]*len(symbols)))
    random.seed()
    seed = random.random()
    for i, symbol in enumerate(symbols):
        test_data = data[data["Symbol"] == symbol]
        sf = data[data["Symbol"] != symbol]
        assert(len(test_data) > 0)
        
        train_data, _ = sf.random_split(split_perc)#, seed=seed)
        assert(len(train_data[train_data["Symbol"] == symbol]) == 0)
        
        test_data = apply_transformations(test_data)
        train_data = apply_transformations(train_data)
        
        models = train_ensemble(train_data, num_models=num_models, row_subsample=row_subsample, verbose=verbose)
        probs = get_probs(models, test_data)
        averaged_probs = average_probabilities(probs)
        eval_results = evaluate_model(averaged_probs, test_data)
        results[symbol] = {
            "precision": eval_results["precision"],
            "recall": eval_results["recall"],
            "auc": eval_results["auc"],
            "f1_score": eval_results["f1_score"],
            "specificity": eval_results["specificity"],
            "num_targets": eval_results["num_targets"],
            "real_success_rate": len(test_data[test_data["Profitable"] == 1]) / len(test_data)
        }
        print(f"[{i + 1}/{len(symbols)}] Evaluated {symbol}, Results: {results[symbol]}")
    return results
        


In [6]:
def select_best_symbols(generalization_results, min_precision = 0.62, min_baseline_perc = 0.65, min_model_improvement_perc=0.15, min_recall=0.1, verbose=False):
    best_symbols = []
    for symbol in generalization_results.keys():
        results = generalization_results[symbol]
        precision = results["precision"]
        recall = results["recall"]
        real_success_rate = results["real_success_rate"]
        model_improvement_perc = 0.0
        
        
        # Skip any symbols where the model performs worse than real life
        if precision <= real_success_rate:
            if verbose:
                print(f"[{symbol}] Precision less than success rate, skipping")
            continue
        else:
            model_improvement_perc = (precision - real_success_rate) / real_success_rate
        
        if verbose:
            print(f"[{symbol}] <> Precision: {round(precision,2)} <> Recall: {round(recall,2)} <> Orig Accuracy: {round(real_success_rate * 100.0, 2)}% <> Baseline Improvement: {round(model_improvement_perc * 100.0, 2)}%")
        
        if real_success_rate >= min_baseline_perc and precision >= min_baseline_perc:
            pass
        else:
            if precision < min_precision:
                continue
            if recall < min_recall:
                continue
            if model_improvement_perc < min_model_improvement_perc:
                continue
        
        if verbose:
            print(f"Selected [{symbol}]")
        best_symbols.append(symbol)
    return best_symbols

In [None]:
data = get_data(results_df)
results = evaluate_generalization(data, verbose=True)

In [None]:
best_symbols = select_best_symbols(results)

In [None]:
best_symbols

In [None]:
models, train_data, test_data = train_ensemble_on_symbols(best_symbols, data, row_subsample=True)
probs = get_probs(models, test_data)

In [None]:
averaged_probs = average_probabilities(probs)
model_results = evaluate_model(averaged_probs, test_data, threshold=0.65)
len(train_data)

In [10]:
def run_evaluation_pipeline(results_df, generalization_cycles = 3, min_precision = 0.62, min_model_improvement_perc=0.15, min_recall=0.1, split_perc=0.8, num_models=10, row_subsample=False, threshold=0.6, verbose=False):
    data = get_data(results_df)
    all_generalization_results = []
    averaged_generalization_results = {}
    for _ in range(generalization_cycles):
        generalization_results = evaluate_generalization(data, num_models=num_models, row_subsample=row_subsample, verbose=verbose)
        all_generalization_results.append(generalization_results)
    
    
    for symbol in all_generalization_results[0].keys():
        if symbol not in averaged_generalization_results.keys():
                averaged_generalization_results[symbol] = {}
        
        sum_precision, sum_recall, sum_specificity, sum_real_success_rate, sum_auc, sum_f1_score, sum_num_targets = 0, 0, 0, 0, 0, 0, 0
        for gen_result in all_generalization_results:
                sum_precision += gen_result[symbol]["precision"]
                sum_recall += gen_result[symbol]["recall"]
                sum_specificity += gen_result[symbol]["specificity"]
                sum_real_success_rate += gen_result[symbol]["real_success_rate"]
                sum_auc += gen_result[symbol]["auc"]
                sum_f1_score += gen_result[symbol]["f1_score"]
                sum_num_targets += gen_result[symbol]["num_targets"]
        
        averaged_generalization_results[symbol]["precision"] = sum_precision / len(all_generalization_results)
        averaged_generalization_results[symbol]["recall"] = sum_recall / len(all_generalization_results)
        averaged_generalization_results[symbol]["specificity"] = sum_specificity / len(all_generalization_results)
        averaged_generalization_results[symbol]["real_success_rate"] = sum_real_success_rate / len(all_generalization_results)
        averaged_generalization_results[symbol]["auc"] = sum_auc / len(all_generalization_results)
        averaged_generalization_results[symbol]["f1_score"] = sum_f1_score / len(all_generalization_results)
        averaged_generalization_results[symbol]["num_targets"] = sum_num_targets / len(all_generalization_results)
            
    best_symbols = select_best_symbols(averaged_generalization_results, min_precision=min_precision, min_model_improvement_perc=min_model_improvement_perc, min_recall=min_recall, verbose=verbose)
    models_trained_on_best_symbols, train_data, test_data = train_ensemble_on_symbols(best_symbols, data, split_perc=split_perc, num_models=num_models, row_subsample=row_subsample, verbose=verbose)
    
    probs = get_probs(models_trained_on_best_symbols, test_data)
    averaged_probs = average_probabilities(probs)
    model_results = evaluate_model(averaged_probs, test_data, threshold=threshold)
    return models_trained_on_best_symbols, best_symbols, model_results, train_data, test_data
    
    

In [13]:
models_trained_on_best_symbols, best_symbols, model_results, train_data, test_data = run_evaluation_pipeline(results_df)

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,str,int,str,str,str,str,int,int,float,float,str,str,str,float,float,float,float,float,str,float,str,str,float,str,str,str,str,str,float,float,int,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[1/146] Evaluated ADAUSDT, Results: {'precision': 0.625, 'recall': 0.2777777777777778, 'auc': 0.6458333333333333, 'f1_score': 0.3846153846153846, 'specificity': 0.85, 'num_targets': 76, 'real_success_rate': 0.47368421052631576}
[2/146] Evaluated SANDBUSD, Results: {'precision': 0.8461538461538461, 'recall': 0.5238095238095238, 'auc': 0.7651888341543513, 'f1_score': 0.6470588235294118, 'specificity': 0.9310344827586207, 'num_targets': 50, 'real_success_rate': 0.42}


KeyboardInterrupt: 

In [None]:
best_symbols, model_results

In [None]:
evaluate_model(get_probs(models_trained_on_best_symbols, test_data), test_data, threshold=0.625)

In [None]:
svm_models = train_svm_models(train_data, verbose=True)

In [None]:
def create_pred_sframe_row(models, row, label_column = "Profitable", svm_models=[]):
    sf_row = {}
    max_digits = len(str(len(models)))
    for i, model in enumerate(models + svm_models):
        model_formatted_num = format(i, f'0{max_digits}d')
        key = "Model" + model_formatted_num
        if type(model) != tc.toolkits.classifier.svm_classifier.SVMClassifier:
            sf_row[key] = [model.predict(row, output_type='probability')[0]]
        else:
            pred = model.predict(row)[0]
            pred = 1 if pred == 'True' or pred == 1 else 0
            sf_row[key] = [pred]
    sf_row[label_column] = [row[label_column]]
    return tc.SFrame(sf_row)

In [None]:
#meta_model = tc.logistic_classifier.create(train_sf, target=label_column[0], max_iterations=10)

In [None]:
def create_meta_model_sframe(models, target_sf, svm_models=[]):
    new_sf = tc.SFrame()
    for row in target_sf:
        sf_row = create_pred_sframe_row(models, row, svm_models=svm_models)
        new_sf = new_sf.append(sf_row)
    return new_sf
        
        

def meta_model_predict(models, meta_model, target_sf):
    #meta_model_sframe = create_meta_model_sframe(models, target_sf)
    def predict(index):
        return meta_model.predict(target_sf[index], output_type='probability')[0]
    return list(
        map(
            predict, [i for i in range(len(target_sf))])
           )
    #return [meta_model.predict(row, output_type='probability')[0] for row in meta_model_sframe]
        

In [None]:
#probabilities = meta_model_predict(models_trained_on_best_symbols, meta_model, test_data)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.autograd import Variable
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
#meta_model_train_data = create_meta_model_sframe(models_trained_on_best_symbols, train_data, svm_models=svm_models)
#meta_model_test_data = create_meta_model_sframe(models_trained_on_best_symbols, test_data, svm_models=svm_models)
meta_model_train_data = create_meta_model_sframe(models_trained_on_best_symbols, train_data)
meta_model_test_data = create_meta_model_sframe(models_trained_on_best_symbols, test_data)
#meta_model_train_data["Profitable"] = meta_model_train_data["Profitable"].apply(lambda l: 1 if l == 'True' else 0).to_numpy()
#meta_model_test_data["Profitable"] = meta_model_test_data["Profitable"].apply(lambda l: 1 if l == 'True' else 0).to_numpy()



In [None]:
#meta_model = tc.logistic_classifier.create(meta_model_train_data, target=label_column[0], max_iterations=10)


In [None]:
#probabilities = meta_model_predict(models_trained_on_best_symbols, meta_model, meta_model_test_data)
#results = evaluate_model([probabilities], meta_model_test_data, threshold=0.7)

In [None]:
class MetaDataset(Dataset):
    def __init__(self, sf: tc.SFrame, label_column: str = "Profitable"):
        self.df = sf.to_dataframe()
        self.labels = torch.from_numpy(
            self.df[label_column].to_numpy()
        )
        self.X = torch.from_numpy(
            self.df.drop(columns = [label_column], axis = 1).to_numpy()
        )
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        label = self.labels[idx]
        ensemble_outputs = self.X[idx]
        return [ensemble_outputs.float(), label]

        

In [None]:
BATCH_SIZE = 32
md = MetaDataset(meta_model_train_data)
class_counts = [
    len(meta_model_train_data[meta_model_train_data["Profitable"] == 1]),
    len(meta_model_train_data[meta_model_train_data["Profitable"] == 0])
]
num_samples = sum(class_counts)
labels = \
    list(meta_model_train_data[meta_model_train_data["Profitable"] == 1]["Profitable"]) + \
    list(meta_model_train_data[meta_model_train_data["Profitable"] == 0]["Profitable"])
class_weights = [num_samples/class_counts[i] for i in range(len(class_counts))]
weights = [class_weights[labels[i]] for i in range(int(num_samples))]
sampler = WeightedRandomSampler(torch.DoubleTensor(weights), int(num_samples))

#loader = DataLoader(md, batch_size=BATCH_SIZE, shuffle=False, sampler=sampler)
loader = DataLoader(md, batch_size=BATCH_SIZE, shuffle=True)


In [None]:
def train_epoch(model, opt, criterion, loader, batch_size=50):
    model.train()
    losses = []
    for x_batch, y_batch in loader:
        x_batch = Variable(x_batch)
        y_batch = Variable(y_batch)
        
        y_batch = y_batch.float()
        opt.zero_grad()
        # (1) Forward
        y_hat = model(x_batch)
        # (2) Compute diff
        loss = criterion(y_hat, y_batch.unsqueeze(1))
        # (3) Compute gradients
        loss.backward()
        # (4) update weights
        opt.step()        
        losses.append(loss.data.numpy())
    return losses

In [None]:
class DNNBinaryClassifier(nn.Module):
    def __init__(self, input_features = 30, batch_size = 64):
        super(DNNBinaryClassifier, self).__init__()
        
        hidden_nodes1 = 64
        hidden_nodes2 = 32
        self.fc1 = nn.Linear(input_features, batch_size)
        self.relu1 = nn.LeakyReLU()
        self.bn1d1 = nn.BatchNorm1d(batch_size, input_features)
        self.dout1 = nn.Dropout(0.25)
        self.fc2 = nn.Linear(batch_size, hidden_nodes2)
        self.relu2 = nn.LeakyReLU()
        self.dout2 = nn.Dropout(0.25)
        self.prelu = nn.PReLU(1)
        self.out = nn.Linear(hidden_nodes2, 1)
        
        
        
        #self.dout2 = nn.Dropout(0.3)
        #self.bn1d2 = nn.BatchNorm1d(hidden_nodes2, batch_size)
        #self.fc3 = nn.Linear(hidden_nodes1, hidden_nodes2)
        #self.out_act = nn.Softmax()
        
        
    def forward(self, inputs):
        #x = self.bn1d(inputs)
        #x = self.fc1(x)
        x = self.fc1(inputs)
        x = self.relu1(x)
        x = self.bn1d1(x)
        x = self.dout1(x)
        x = self.fc2(x)
        x = self.relu1(x)
        x = self.dout2(x)
        x = self.prelu(x)
        x = self.out(x)
        
        #x = self.bn1d2(x)
        #x = self.dout2(x)
        #x = self.fc3(x)
        #x = self.prelu(x)
        #x = self.out_act(x)
        
        
        return x

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
input_features = len(models_trained_on_best_symbols)# + len(svm_models)
model = DNNBinaryClassifier(input_features=input_features )
model.to(device)
EPOCHS = 20

LEARNING_RATE = 3e-4
e_losses = []
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=5e-3)

for e in range(EPOCHS):
    e_losses += train_epoch(model, optimizer, criterion, loader, batch_size=BATCH_SIZE)
plt.plot(e_losses)

In [None]:
model.eval()
test = torch.from_numpy(meta_model_train_data.to_numpy()[10][:-1].astype(float)).float()
float(torch.sigmoid(model(test.unsqueeze(0))))

In [None]:
def predictions_for_nn_meta_model(data, meta_model, verbose=False):
    meta_model.eval()
    
    def make_prediction(index):
        tensor = torch.from_numpy(data.to_numpy()[index][:-1].astype(float)).float()
        pred = float(torch.sigmoid(meta_model(tensor.unsqueeze(0))))
        label = train_data[index]["Profitable"]
        if verbose:
            print(f"Label {'True' if label else 'False'}, Confidence: {pred}")
        return pred
    
    predictions = list(map(make_prediction, [i for i in range(len(data))]))
    return predictions

In [None]:
train_predictions = predictions_for_nn_meta_model(meta_model_train_data, model, verbose=True)
train_results = evaluate_model([train_predictions], meta_model_train_data)
train_results

In [None]:
test_predictions = predictions_for_nn_meta_model(meta_model_test_data, model)


In [None]:
test_results = evaluate_model([test_predictions], meta_model_test_data, threshold=0.9)
test_results
real_success_rate = len(meta_model_test_data[meta_model_test_data["Profitable"] == 1]) / len(meta_model_test_data)
test_results, real_success_rate