In [4]:
import numpy as np

import seaborn as sns
from matplotlib import pyplot as plt

from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

import torch
from torch import nn
from torch.nn import functional as F

from torch.utils.data import TensorDataset, DataLoader
from scipy.special import softmax
from random import uniform
from math import cos, sin, pi

sns.set(style="darkgrid", font_scale=1.4)

In [5]:
BATCH_SIZE = 100
NUM_PARAMETERS = 1000
NUM_TEST = 100000

In [6]:
def generate1():
    a = uniform(0, 1)
    b = uniform(0, 1)
    return a * cos(2 * pi * b), a * sin(2 * pi * b)


def generate2():
    while True:
        x = uniform(-1, 1)
        y = uniform(-1, 1)
        if x ** 2 + y ** 2 > 1:
            continue
        return (x, y)

In [7]:
def gen1():
    return torch.flatten(torch.stack([torch.tensor(generate1()) for _ in range(NUM_PARAMETERS)]))#, torch.tensor(0)

def gen2():
    return torch.flatten(torch.stack([torch.tensor(generate2()) for _ in range(NUM_PARAMETERS)]))#, torch.tensor(1)

In [8]:
gen1().dtype

torch.float32

In [558]:
X1_=torch.stack([gen1() for _ in range(1000)])
X2_=torch.stack([gen2() for _ in range(1000)])
# X_.shape

In [559]:
torch.mean(torch.std(X1_, axis=1)), torch.min(torch.std(X1_, axis=1)), torch.max(torch.std(X1_, axis=1))

(tensor(0.4079), tensor(0.3863), tensor(0.4264))

In [560]:
torch.mean(torch.std(X2_, axis=1)), torch.min(torch.std(X2_, axis=1)), torch.max(torch.std(X2_, axis=1))

(tensor(0.5000), tensor(0.4875), tensor(0.5145))

In [561]:
def predictor(X):
    if (torch.std(X) < 0.45):
        return 1
    else:
        return 2

In [574]:
predictor(gen1())

1

In [374]:
torch.cat((torch.stack([gen1() for _ in range(NUM_TEST)]), torch.stack([gen2() for _ in range(NUM_TEST)])), dim=0).shape, torch.cat((torch.zeros(NUM_TEST, 1), torch.ones(NUM_TEST, 1)), dim=0).shape

(torch.Size([200000, 200]), torch.Size([200000, 1]))

In [375]:
X = torch.cat((torch.stack([gen1() for _ in range(NUM_TEST)]), torch.stack([gen2() for _ in range(NUM_TEST)])), dim=0)

y = torch.cat((torch.zeros(NUM_TEST, 1, dtype=int), torch.ones(NUM_TEST, 1, dtype=int)), dim=0)

train_dataset = TensorDataset(X, y)

train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True, num_workers=2)

In [376]:
X = torch.cat((torch.stack([gen1() for _ in range(NUM_TEST)]), torch.stack([gen2() for _ in range(NUM_TEST)])), dim=0)

y = torch.cat((torch.zeros(NUM_TEST, dtype=int), torch.ones(NUM_TEST, dtype=int)), dim=0)

test_dataset = TensorDataset(X, y)

valid_dataloader = DataLoader(test_dataset, batch_size = BATCH_SIZE, num_workers=2)

In [377]:
train_dataloader.dataset[0][0].shape

torch.Size([200])

In [378]:
in_features = NUM_PARAMETERS * 2
out_features = 2
loaders = {"train": train_dataloader, "valid": valid_dataloader}

def create_model(activation):
    return nn.Sequential(
        nn.Linear(in_features, 128),
        activation(),
        nn.Linear(128, 128),
        activation(),
        nn.Linear(128, 128),
        activation(),
        nn.Linear(128, out_features)
    )

In [379]:
max_epochs = 10
def train_model(model, criterion, optimizer, max_epochs):
    accuracy = {"train": [], "valid": []}
    for epoch in range(max_epochs):
        for k, dataloader in loaders.items():
            epoch_correct = 0
            epoch_all = 0
            i = 0
            
            for x_batch, y_batch in dataloader:
                # x_batch, y_batch = x_batch.to(device), y_batch.to(device)
                y_batch = y_batch.view(-1)
                if k == "train":
                    model.train()
                    outp = model(x_batch)
                    optimizer.zero_grad()
                    # print (outp, y_batch)
                    loss = criterion(outp, y_batch)
                    loss.backward()
                    optimizer.step()
                else:
                    model.eval()
                    with torch.no_grad():
                        outp = model(x_batch)
                preds = outp.argmax(-1)
                correct = (y_batch == preds).sum().item()
                epoch_correct += correct
                epoch_all += BATCH_SIZE

            if k == "train":
                print(f"Epoch: {epoch+1}")
            print(f"Loader: {k}. Accuracy: {epoch_correct/epoch_all}")
            print(epoch_correct, epoch_all)
            accuracy[k].append(epoch_correct/epoch_all)
    return accuracy["valid"]

In [380]:
model = create_model(nn.ReLU)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
train_model(model, criterion, optimizer, max_epochs)

Epoch: 1
Loader: train. Accuracy: 0.8836
176720 200000
Loader: valid. Accuracy: 0.92019
184038 200000
Epoch: 2
Loader: train. Accuracy: 0.94313
188626 200000
Loader: valid. Accuracy: 0.92476
184952 200000
Epoch: 3
Loader: train. Accuracy: 0.95477
190954 200000
Loader: valid. Accuracy: 0.91801
183602 200000
Epoch: 4
Loader: train. Accuracy: 0.963045
192609 200000
Loader: valid. Accuracy: 0.930755
186151 200000
Epoch: 5
Loader: train. Accuracy: 0.968995
193799 200000
Loader: valid. Accuracy: 0.931835
186367 200000
Epoch: 6
Loader: train. Accuracy: 0.972655
194531 200000
Loader: valid. Accuracy: 0.931835
186367 200000
Epoch: 7
Loader: train. Accuracy: 0.976225
195245 200000
Loader: valid. Accuracy: 0.933925
186785 200000
Epoch: 8
Loader: train. Accuracy: 0.978395
195679 200000
Loader: valid. Accuracy: 0.933095
186619 200000
Epoch: 9
Loader: train. Accuracy: 0.981015
196203 200000
Loader: valid. Accuracy: 0.933355
186671 200000
Epoch: 10
Loader: train. Accuracy: 0.982585
196517 200000
Load

[0.92019,
 0.92476,
 0.91801,
 0.930755,
 0.931835,
 0.931835,
 0.933925,
 0.933095,
 0.933355,
 0.9352]

In [336]:
model.eval()
with torch.no_grad():
    outp = model(gen2())
outp.argmax(-1)

tensor(1)

In [661]:
import sklearn.linear_model as lin

In [666]:
def foo(x):
    return x[0]*x[0] + x[1]*x[1] + x[2]*x[2] + x[3]*x[3] + x[4]*x[4]

In [675]:
def generate():
    X = np.random.randn(10, 5) * 100
    y = [foo(v) for v in X]
    return X, y

X, y = generate()
X, y

(array([[  -5.51749971,   82.12078766,   30.6838519 ,   84.40676924,
         -182.16614372],
        [  80.82767499,    3.00898762,  -29.45809247,  140.97015499,
          -81.93509822],
        [-104.4668926 ,   35.55487603,   63.37091791,  -86.0555033 ,
          144.9415766 ],
        [ -70.0884038 ,  -72.83505531,  122.38079532,   46.9537485 ,
          155.18128515],
        [ -48.05606149, -232.7991452 ,  -84.49278978, -124.75921645,
           73.67610309],
        [  48.95024955, -282.1526352 ,   25.91570826,   85.88944604,
          -37.13355185],
        [ -27.58595703,  -59.67418045,   95.5381413 ,  -87.36663048,
          -40.33462062],
        [-193.36434599,  -10.22894243,   65.21890893,   11.42312535,
          125.07454579],
        [  68.97292171,  164.00271585, -119.91182875,   12.8841102 ,
          128.70877033],
        [  11.4216428 ,  -82.18455894,  105.16407936,  -56.7715209 ,
          -28.7316129 ]]),
 [48024.771945100394,
  33995.89118090534,
  44606.9643717

In [681]:
for i in range(10):
    for x in X[i]:
        print(x, end=" ")
    print()

-5.517499710389823 82.12078765504765 30.6838518981288 84.40676923682304 -182.16614371681752 
80.82767499299834 3.0089876164360234 -29.45809246656234 140.97015498595022 -81.93509822490206 
-104.46689259802034 35.55487603237243 63.370917912033796 -86.05550329909313 144.94157660201444 
-70.08840380298463 -72.83505530973987 122.38079532393171 46.953748496960266 155.18128514541502 
-48.05606149286283 -232.79914520459357 -84.49278978468145 -124.75921645131214 73.67610308779304 
48.95024954647355 -282.1526352011628 25.91570825833906 85.8894460426809 -37.13355185058781 
-27.585957029890896 -59.674180449562996 95.53814130249643 -87.36663047968814 -40.33462061689295 
-193.36434598592825 -10.22894243408599 65.21890893181343 11.423125352099383 125.07454578784689 
68.97292171293195 164.00271585100458 -119.9118287466338 12.884110195995193 128.70877033273536 
11.42164280074466 -82.18455893618484 105.16407936129397 -56.77152090169376 -28.73161289647704 


In [673]:
for i in range(5):
    for j in range(i, 5):
        print(i, j, X.shape)
        X_ = (X[:,i] * X[:,j]).reshape(10, 1)
        X = np.append(X, X_, axis=1)
        
X

0 0 (10, 5)
0 1 (10, 6)
0 2 (10, 7)
0 3 (10, 8)
0 4 (10, 9)
1 1 (10, 10)
1 2 (10, 11)
1 3 (10, 12)
1 4 (10, 13)
2 2 (10, 14)
2 3 (10, 15)
2 4 (10, 16)
3 3 (10, 17)
3 4 (10, 18)
4 4 (10, 19)


array([[-2.44168967e+01,  1.15808089e+02,  3.93170838e+01,
         7.14673958e+01, -5.47585759e+01,  5.96184842e+02,
        -2.82767414e+03, -9.60001171e+02, -1.74501202e+03,
         1.33703449e+03,  1.34115135e+04,  4.55323634e+03,
         8.27650254e+03, -6.34148604e+03,  1.54583308e+03,
         2.80988959e+03, -2.15294752e+03,  5.10758867e+03,
        -3.91345282e+03,  2.99850164e+03],
       [ 6.12946838e+01, -8.28879151e+01,  2.02463660e+02,
         5.36989059e+00,  2.72802949e+01,  3.75703827e+03,
        -5.08058855e+03,  1.24099461e+04,  3.29145746e+02,
         1.67213705e+03,  6.87040647e+03, -1.67817907e+04,
        -4.45099035e+02, -2.26120677e+03,  4.09915338e+04,
         1.08720770e+03,  5.52326836e+03,  2.88357249e+01,
         1.46492199e+02,  7.44214490e+02],
       [-1.82039323e+01,  1.69557803e+02, -9.33035947e+01,
         2.57478587e+02, -8.05223779e+01,  3.31383151e+02,
        -3.08661876e+03,  1.69849232e+03, -4.68712277e+03,
         1.46582392e+03,  2.8

In [669]:
model = lin.LinearRegression()

model.fit(X, y)

model.predict(X)

array([113747.48858743, 127088.26871452,  67852.50401179,  48298.1001524 ,
        80225.03199408,  93084.38633936, 107248.18074213,  20045.92816685,
        35991.19983103,  83177.13440568])