In [91]:
import pandas as pd
import bz2
import csv
import networkx as nx

from io import StringIO
from decimal import Decimal

filename = "data.everef.net/market-orders/history/2023/2023-01-01/market-orders-2023-01-01_00-15-03.v3.csv.bz2"

with bz2.open(filename, mode="r") as data_csv:
    contents = data_csv.read()

contents_f = StringIO(bytes.decode(contents, "utf-8"))

reader = csv.DictReader(contents_f)

In [50]:
type_id_to_buys_and_sells = {}

for row in reader:
    type_id = row["type_id"]
    if type_id not in type_id_to_buys_and_sells:
        buys_and_sells = {"buys":[], "sells":[]}
        type_id_to_buys_and_sells[type_id] = buys_and_sells
    else:
        buys_and_sells = type_id_to_buys_and_sells[type_id]
    
    if row["is_buy_order"] == 'true':
        buys_and_sells["buys"].append(row)
    else:
        buys_and_sells["sells"].append(row)
        
arbitrages = []
        
for type_id, buys_and_sells in type_id_to_buys_and_sells.items():
    for buy in buys_and_sells["buys"]:
        for sell in buys_and_sells["sells"]:
            # conditions for valid arbitrage:
            # 1. sell order lower than buy order
            # (i.e. we buy from the sell order low, and sell into the buy order high)
            # 2. sell volume remain is higher than buy min_volume
    
            # TODO: smarter, non O(n^2) way to do this
            sell_price = Decimal(sell["price"])
            buy_price = Decimal(buy["price"])
            
            if sell_price < buy_price and int(sell["volume_remain"]) > int(buy["min_volume"]):
                # arbitrage found!
                arbitrages.append((sell, buy))

In [51]:
len(arbitrages)

1817195

In [85]:
def arbitrage_stats(arbitrage):
    wallet_amount = 20_000_000

    max_quantity = min(int(arbitrage[0]["volume_remain"]), int(arbitrage[1]["volume_remain"]))
    sell_price = float(arbitrage[0]["price"])
    buy_price = float(arbitrage[1]["price"])
    
    # you can only invest as much as you can afford
    quantity = min(max_quantity, wallet_amount/sell_price)
    real_investment = sell_price * quantity
    profit = quantity * (buy_price - sell_price)
    total_return = 100 * profit/real_investment
    
    # If you have money in your wallet you can't invest because the opportunity is too small
    # then it's like you did invest it, but with no return.
    # The return considering ALL the money we had available may be much, much lower.
    # This captures the idea that there's no point pursuing opportunities to make 600% on like $5
    adj_return = 100 * profit/wallet_amount
    
    # TODO: this considers each investment to take the same amount of time, which isn't true
    # We should be calculating the return per jump (with compounding).
    # We definitely need to filter out completely SHIT investments that wouldn't even be worth
    # doing if they took only one jump.
    
    return {
        "total_return": total_return,
        "adj_return": adj_return,
        "type_id": arbitrage[0]["type_id"],
        "from_system":arbitrage[0]["system_id"],
        "to_system":arbitrage[1]["system_id"],
        "real_investment": real_investment,
        "profit": profit
    }

In [86]:
arb_stats = sorted([arbitrage_stats(arb) for arb in arbitrages], key=lambda arb: arb["adj_return"], reverse=True)
arb_stats

[{'total_return': 4225.714285714285,
  'adj_return': 1120.37208,
  'type_id': '1824',
  'from_system': '30004967',
  'to_system': '30002187',
  'real_investment': 5302640.0,
  'profit': 224074416.0},
 {'total_return': 3937.3333333333335,
  'adj_return': 1118.47828,
  'type_id': '1824',
  'from_system': '30002803',
  'to_system': '30002187',
  'real_investment': 5681400.0,
  'profit': 223695656.0},
 {'total_return': 3340.1272438082256,
  'adj_return': 1113.5468248,
  'type_id': '1824',
  'from_system': '30003327',
  'to_system': '30002187',
  'real_investment': 6667691.04,
  'profit': 222709364.96},
 {'total_return': 3087.032943900642,
  'adj_return': 1110.8992924,
  'type_id': '1824',
  'from_system': '30003018',
  'to_system': '30002187',
  'real_investment': 7197197.5200000005,
  'profit': 222179858.48},
 {'total_return': 3070.6806282722514,
  'adj_return': 1110.7137,
  'type_id': '1824',
  'from_system': '30002801',
  'to_system': '30002187',
  'real_investment': 7234316.0,
  'profi

In [87]:
arb_stats = [arb for arb in arb_stats if arb["adj_return"] > 50]
len(arb_stats)

5386

In [94]:
# We need to annotate each arbitrage with its distance

with bz2.open("mapSolarSystemJumps.csv.bz2", mode="r") as data_csv:
    route_contents = data_csv.read()

route_contents_f = StringIO(bytes.decode(route_contents, "utf-8"))
route_reader = csv.DictReader(route_contents_f)

G = nx.Graph()

for row in route_reader:
    G.add_edge(row["fromSolarSystemID"], row["toSolarSystemID"])

In [103]:
valid_arbitrages = []

for arb in arb_stats:
    if not nx.has_path(G, arb["from_system"], arb["to_system"]):
        # There are a lot of arbitrages in this list between
        # different universes and stuff like that
        break
    
    arb["jumps"] = len(nx.shortest_path(G, arb["from_system"], arb["to_system"]))
    
    # return per jump, considering compounding
    # i.e. a 100% return per jump with 2 jumps means you double your money
    # twice for a total of 300% return for the whole trip
    
    arb["adj_return_per_jump"] = 100*((1+(arb["adj_return"]/100))**(1/arb["jumps"])-1)
    
    valid_arbitrages.append(arb)
    
valid_arbitrages = sorted(valid_arbitrages, key=lambda a: a["adj_return_per_jump"], reverse=True)

In [106]:
len(valid_arbitrages)

194

In [107]:
valid_arbitrages

[{'total_return': 1945.945945945946,
  'adj_return': 1090.8288,
  'type_id': '1824',
  'from_system': '30003522',
  'to_system': '30002187',
  'real_investment': 11211296.0,
  'profit': 218165760.0,
  'jumps': 2,
  'adj_return_per_jump': 245.08387386257274},
 {'total_return': 1945.945945945946,
  'adj_return': 1090.8288,
  'type_id': '1824',
  'from_system': '30003563',
  'to_system': '30002187',
  'real_investment': 11211296.0,
  'profit': 218165760.0,
  'jumps': 11,
  'adj_return_per_jump': 25.257714892206874},
 {'total_return': 1925.4180602006688,
  'adj_return': 1090.26066,
  'type_id': '1824',
  'from_system': '30000142',
  'to_system': '30002187',
  'real_investment': 11324924.0,
  'profit': 218052132.0,
  'jumps': 12,
  'adj_return_per_jump': 22.924044163799806},
 {'total_return': 1924.0641711229948,
  'adj_return': 1090.222784,
  'type_id': '1824',
  'from_system': '30000142',
  'to_system': '30002187',
  'real_investment': 11332499.2,
  'profit': 218044556.8,
  'jumps': 12,
  