# Setup

In [2]:
import os
import json
import pandas as pd
import glob
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import requests
import time
import torch
import sys
from nixtlats import TimeGPT
print("success")

success


In [3]:
if os.getcwd()[-9:] == "notebooks":
    os.chdir(os.path.dirname(os.getcwd()))
os.getcwd()

'/home/koos/Documents/timeseries_transfer_learning'

In [4]:
from utils.metric import *

In [5]:
np.random.seed(42)

In [6]:
def get_features_targets(df, features_length, targets_length):
    if len(df) < features_length + targets_length:
        raise ValueError("features and targets can´t be longer than data")
    
    min_start_targets = features_length
    max_start_targets = len(df) - targets_length
    targets_start = np.random.randint(min_start_targets, max_start_targets)
    targets_end = targets_start + targets_length
    features_start = targets_start - features_length
    features, targets = df[features_start:targets_start], df[targets_start:targets_end]
    assert len(targets) == targets_length
    assert len(features) == features_length
    return features, targets

# Make calls

In [7]:
WINDOW_SIZE = 504

In [None]:
try:
    with open(f"results/timegpt/timegpt_results_{WINDOW_SIZE}.json", "r") as file:
        results = json.load(file)
except FileNotFoundError:
    input("FILE NOUT FOUND, WRITING NEW ONE PRESS ENTER TO CONTINUE ")
    results = {}

done = list(results.keys())
print(f"{len(done)} calls were already done, skipping")

api_key = "insert api key here"
timegpt = TimeGPT(token=api_key)
assert timegpt.validate_token()

skipped = 0
files = os.listdir("data/processed/test")
for file in tqdm(files):
    #print("calling data for", file)
    key = file.split("\\")[-1][:-8]
    if key in done:
        continue
    ticker = key.replace("-High", "").replace("-Low", "").replace("-Close", "")
    
    dates = pd.read_csv(f"data/raw_download/stocks/{ticker}.csv")

    df = pd.read_parquet("data/processed/test/" + file)
    col = df.columns[0]

    assert len(df) == len(dates)
    df["Date"] = pd.to_datetime(dates["Date"], utc=True)
    df.set_index("Date", inplace=True)
    if len(df) <= WINDOW_SIZE + 14:
        skipped += 1
        continue
    
    
    features, targets = get_features_targets(df, WINDOW_SIZE, 14)
    
    data_for_timegpt = {}
    for i, row in features.iterrows():
        data_for_timegpt[i.strftime("%Y-%m-%d")] = row.sum()
    
    
    url = "https://dashboard.nixtla.io/api/timegpt"

    payload = {
        "model": "timegpt-1",
        "freq": "D",
        "fh": len(targets),
        "y": data_for_timegpt,
        "clean_ex_first": True,
        "finetune_steps": 100
    }
    headers = {
        "accept": "application/json",
        "content-type": "application/json",
        "authorization": f"Bearer {api_key}"
    }

    
    try:
        response = requests.post(url, json=payload, headers=headers, timeout=180)
        output = json.loads(response.text)
    except json.JSONDecodeError:
        print("FAILED DECODE:", response.text)
        continue
    except (ConnectionError, requests.ConnectionError) as e:
        print(f"ConnectionError: {e}")
        continue
    except (TimeoutError, requests.ReadTimeout) as e:
        print(f"Timeout error: {e}")
    if output["message"] != "success":
        print("FAILED:", output["message"])
        continue

    output_df = pd.DataFrame(output["data"])
    output_df["timestamp"] = pd.to_datetime(output_df["timestamp"])
    output_df.set_index("timestamp", inplace=True)

    #plt.plot(list(np.array(features).flatten()) + list(targets.iloc[:, 0]))
    #plt.plot([None for _ in range(len(features))] + list(output_df.loc[:,"value"]))
    #plt.show()

    results[key] = {"features": list(features.iloc[:, 0]),
                   "targets": list(targets.iloc[:, 0]),
                   "output": output}
    
    with open(f"results/timegpt/timegpt_results_{WINDOW_SIZE}.json", "w") as file:
        json.dump(results, file)
    time.sleep(1)
    
print("skipped", skipped, "files because of insufficient length")


# Analyse prediction

In [8]:
def normalize_list(lst):
    lst = np.array(lst)
    min_value = min(lst)
    max_value = max(lst)
    if min_value == max_value:
        return lst, (min_value, max_value)
    else:
        return list(((lst - min_value) / (max_value - min_value))), (min_value, max_value)

def reverse_normalize_list(lst, factors):
    min_value, max_value = factors
    return [(x * (max_value - min_value)) + min_value for x in lst]

In [9]:
with open(f"results/timegpt/timegpt_results_{WINDOW_SIZE}.json", "r") as file:
    results = json.load(file)
len(results)

7574

In [10]:
def plot_prediction(features, targets, prediction):
    plt.plot(features, c="black", label="features")
    plt.plot([None for _ in list(features)]+ list(targets), c="grey", label="targets")
    plt.plot([None for _ in list(features)]+ list(prediction), c="red", label="prediction")
    plt.legend()
    plt.title(key)
    plt.show()

In [11]:
all_targets = []
all_preds = []
all_features = []
all_factors = []

for key in results:
    features = np.array(results[key]["features"])
    targets = np.array(results[key]["targets"])
    response = results[key]["output"]
    prediction = np.array(response["data"]["value"])
    
    _, (min_factor, max_factor) = normalize_list(list(features))

    features_norm = ((features - min_factor) / (max_factor - min_factor))
    targets_norm = ((targets - min_factor) / (max_factor - min_factor))
    prediction_norm = ((prediction - min_factor) / (max_factor - min_factor))
    
    #plot_prediction(features_norm, targets_norm, prediction_norm)
    
    all_targets.append(targets_norm)
    all_preds.append(prediction_norm)
    all_features.append(darts.TimeSeries.from_series(features_norm))
    all_factors.append((min_factor, max_factor))

In [12]:
def list_of_timeseries_to_tensor(lst):
    tensor_builder = []
    for t in lst:
        print
        tensor_builder.append(np.array(t))
    return torch.Tensor(np.array(tensor_builder))

preds_tensor = list_of_timeseries_to_tensor(all_preds)
targets_tensor = list_of_timeseries_to_tensor(all_targets)

In [11]:
print("WINDOW_SIZE:", len(all_features[0]))

loss_functions = all_metrices


for i, fn in enumerate(loss_functions):
    if hasattr(fn, "pass_features_and_normal_factors"):
        loss = fn(preds_tensor, targets_tensor, all_features, all_factors)
    else:
        loss = fn(preds_tensor, targets_tensor)

    print(f"Loss for {loss_functions[i]}: {loss}")

WINDOW_SIZE: 504
Loss for L1Loss(): 0.061544425785541534
Loss for MedianAbsoluteError(): 0.045115046203136444
Loss for MSELoss(): 0.010171704925596714
Loss for MedianSquaredError(): 0.002819785615429282
Loss for HuberLoss(): 0.004435580223798752
Loss for MeanLastValueError(): 0.08869222551584244
Loss for MedianLastValueError(): 0.06078791618347168
Loss for MeanTotalReturnError(): 0.04512231796979904
Loss for MedianTotalReturnError(): 0.02182612195611
Loss for GeometricMeanDailyReturnError(): 0.008478716491269063
Loss for MeanFinalReturnError(): 0.06402437539038958
Loss for MedianFinalReturnError(): 0.02703884319274051
Market Outperformance Analysis (top 5.0%):
Market Benchmark: 0.004320308811488409
Portfolio Return: 0.008543210457652654
Outperformance: 0.004222901646164246
Loss for BackTestingProfitError(): 0.004222901646164246
