In [75]:
import { TrainingData } from "./trainingdata.ts";
import { Assets } from "../assets/mod.ts";
import { Community, Investors } from "../repository/mod.ts";
import { DataFrame, Series } from "jsr:@sauber/dataframe";

// Minimum count of future bars to calculate score
const min_bars = 30;

// Assets
// const path = "../../testdata";
const path = "../../../etoro-data/data";
if (!Deno.statSync(path)) throw new Error(`${path} does not exist.`);
const assets: Assets = Assets.disk(path);
const community: Community = assets.community;
const investors: Investors = await community.all();

// Training Data
const td = new TrainingData(min_bars);
const generated: DataFrame = td.generate(investors);

console.log(generated.length, "samples generated");


Data length trimmed 9039 to 8892
Data length trimmed 8892 to 8869
Data length trimmed 8869 to 8849
Data length trimmed 8849 to 8834
Data length trimmed 8834 to 8829
8829 samples generated


In [76]:
// Save data to CSV file
import { parse, stringify } from "jsr:@std/csv";
import { writeFileStr } from "jsr:@std/fs";

const csv = stringify(generated.records, {
  header: true,
  columns: generated.names,
});
console.log(csv.length);
Deno.writeTextFileSync("training_data.csv", csv);

1121204


In [77]:
// Read CSV file
const csvData = Deno.readTextFileSync("training_data.csv");
// const parsedData = parse(csvData, { skipFirstRow: true,  strip: true, });
const parsedData = parse(csvData);
const names = parsedData.shift() as string[];
const series: number[][] = names.map(_=>[]);
for (const row of parsedData) {
  for (let i = 0; i < names.length; i++) {
    series[i].push(parseFloat(row[i]));
  }
}
const records = Object.fromEntries(names.map((name, i) => [name, new Series(series[i])]));
const features = new DataFrame(records);



[33m8829[39m

In [None]:
// Generate functions for parabolic regression line for each input features

import { parabolic, ParabolicResult } from "../math/parabolic.ts";

// Column names
const names: string[] = features.names.filter((name) => name !== "Score");
const ys: number[] = features.values("Score");

// Predict function for each feature
const pfn: Record<string, (x: number) => number> = {};

names.map((name) => {
  const values: number[] = features.values(name);
  const pairs: [number, number][] = values.map((v, i) => [v, ys[i]]);
  const par: ParabolicResult = parabolic(pairs);

  if (isFinite(par.coefficients[0])) pfn[name] = par.predict;
});


[
  [90mundefined[39m, [90mundefined[39m, [90mundefined[39m,
  [90mundefined[39m, [90mundefined[39m, [90mundefined[39m,
  [90mundefined[39m, [90mundefined[39m, [90mundefined[39m,
  [90mundefined[39m, [90mundefined[39m, [90mundefined[39m,
  [90mundefined[39m, [90mundefined[39m, [90mundefined[39m,
  [90mundefined[39m, [90mundefined[39m, [90mundefined[39m,
  [90mundefined[39m, [90mundefined[39m, [90mundefined[39m,
  [90mundefined[39m, [90mundefined[39m, [90mundefined[39m,
  [90mundefined[39m, [90mundefined[39m
]

In [79]:
// Create functions for predicting score from each parameter
function predict(row: Record<string, number>): number {
  const scores: number[] = names.map((name) => {
    const fn: (x: number) => number = pfn[name];
    if (fn) return fn(row[name]);
    return 0;
  });
  return scores.reduce((a, b) => a + b, 0) / names.length;
}

// Predict score for each row
const scores = features.sort("Score").records.map((row) => {
  const score: number = predict(row);
  return {
    "Original": row["Score"],
    "Predicted": score,
  };
});
console.log("Scores", scores);


Scores [
  { Original: -9.58233800893368, Predicted: 0.2467775586720677 },
  { Original: -4.727970159137685, Predicted: 0.37159619522596593 },
  { Original: -4.5228517876383805, Predicted: 0.2852368144274037 },
  { Original: -4.513781700593579, Predicted: 0.19954215553880492 },
  { Original: -4.30406592812079, Predicted: 0.19954215553880492 },
  { Original: -4.298414091363026, Predicted: 0.19954215553880492 },
  { Original: -4.291339169201427, Predicted: 0.19954215553880492 },
  { Original: -4.179296226678445, Predicted: 0.19954215553880492 },
  { Original: -4.176667569715544, Predicted: 0.22785065722817596 },
  { Original: -4.160107315846742, Predicted: 0.23321028500588087 },
  { Original: -4.1494081134953005, Predicted: 0.2261993274288094 },
  { Original: -4.1362982155665, Predicted: 0.22760241310161733 },
  { Original: -4.135769088550326, Predicted: 0.19954215553880492 },
  { Original: -4.1091266062943, Predicted: 0.19954215553880492 },
  { Original: -4.100953095135496, Predicted: 0

In [80]:
// Calculate correlation coefficient between original and predicted scores
import { correlation } from "jsr:@sauber/statistics";

const original: number[] = scores.map((s) => s["Original"]);
const predicted: number[] = scores.map((s) => s["Predicted"]);
const corr: number = correlation(original, predicted);
corr;


[33m0.30611262118059607[39m

In [81]:
// Calculate mean squared error
function MSE(original: number[], predicted: number[]): number {
  const sum: number = original.reduce((acc, val, i) => acc + Math.pow(val - predicted[i], 2), 0);
  return sum / original.length;
};

const error = MSE(original, predicted);
error;

[33m3.792777627234277[39m

In [82]:
// Display Scatter plot of original vs predicted scores
import vl from "npm:vega-lite-api";

const plot = vl
  .markPoint()
  .data(scores)
  .encode(
    vl.x().fieldQ("Original"),
    vl.y().fieldQ("Predicted"),
  );

await Deno.jupyter.display(plot);


In [59]:
import vl from "npm:vega-lite-api";
import { parabolic, ParabolicResult } from "../math/parabolic.ts";

const plots = [];
names
  .map((name) => {
  // const name = "Gain";
  // console.log(features.names);
  const xs: number[] = features.values(name);
  const ys: number[] = features.values("Score");
  const pairs: [number, number][] = xs
    .map((x, i) => [x, ys[i]])
    .sort((a, b) => a[0] - b[0]);
  const par: ParabolicResult = parabolic(pairs);
  // console.log(name, par.coefficients);
  if (isFinite(par.coefficients[0])) {
    // console.log("Adding plot for", name);
    const df = features
      .include([name, "Score"])
      .sort(name)
      .amend("Regression", (row) => par.predict(row[name]));
    // df.print("Parabolic regression");
    const records = df.records;

    const points = vl
      .markPoint()
      .data(records)
      .encode(
        vl.x().fieldQ(name),
        vl.y().fieldQ("Score"),
        vl.fill("#ccc"),
      );

    const line = vl
      // .markLine({ interpolate: "monotone" })
      .markLine()
      .data(records)
      .encode(
        vl.x().fieldQ(name),
        vl.y().fieldQ("Regression"),
      );

    const layered = vl.layer(line, points);
    plots.push(layered);
  } else {
    console.log("Not enough data for parabolic regression", name);
  }
});

const all = vl.concat(plots);

await Deno.jupyter.display(all);


Not enough data for parabolic regression PopularInvestor
