In [1]:
import random
import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

random.seed(17)
%matplotlib inline

In [2]:
def gen(n, density=0.5):
    g = []
    for i in range(n):
        g.append(set())
        for j in range(i):
            if random.random() < density:
                g[i].add(j)
    return g

In [3]:
N = 1000
g = gen(N)
g[:3]

[set(), set(), set()]

In [4]:
def dfs(g, v, visited):
    visited.add(v)
    for u in g[v]:
        if not u in visited:
            visited = dfs(g, u, visited)
    return visited

In [5]:
def union(g, v, u):
    return dfs(g, v, set()) | dfs(g, u, set())

In [6]:
EXAMPLE_COUNT = 1000

X = [(random.randrange(N), random.randrange(N)) for i in range(EXAMPLE_COUNT)]
y = [len(union(g, x[0], x[1])) for x in X]
X[:3], y[:3]

([(907, 275), (421, 696), (453, 74)], [907, 693, 452])

In [7]:
def shuffle(X):
    n = len(X)
    m = random.sample(range(n), n)
    sub = lambda x: (m[x[0]], m[x[1]])
    return m, list(map(sub, X))

In [8]:
m, X = shuffle(X)
X[:3]

[(753, 54), (222, 447), (616, 727)]

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=17)

In [11]:
def score(model):
    model.fit(X_train, y_train)
    print('Train score: {}'.format(mean_squared_error(model.predict(X_train), y_train)))
    print('Test score: {}'.format(mean_squared_error(model.predict(X_test), y_test)))

In [12]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor

In [13]:
score(LinearRegression())

Train score: 54802.05299100932
Test score: 61629.49386981678


In [14]:
score(DecisionTreeRegressor())

Train score: 0.0
Test score: 111404.56363636363


In [15]:
score(AdaBoostRegressor())

Train score: 52264.61756817563
Test score: 62056.5210300031


In [16]:
score(MLPRegressor(alpha=1e-2))

Train score: 107065.02152349374
Test score: 96164.22137514855


In [17]:
#score(SVR(kernel='rbf', C=1e1, gamma=0.1))

In [18]:
#score(SVR(kernel='poly', C=1e1, degree=2))