In [96]:
import torch
import torch.utils.data
from torch.optim import SGD, Adam
from torch.nn import Linear, Sigmoid, Dropout, Softmax, Sequential, ReLU
from torch.nn.functional import one_hot
import torch.nn as nn
import torch.nn.functional as F
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

# Задание 1

In [2]:
import numpy as np
def sample_data():
    count = 10000
    rand = np.random.RandomState(0)
    a = 0.3 + 0.1 * rand.randn(count)
    b = 0.8 + 0.05 * rand.randn(count)
    mask = rand.rand(count) < 0.5
    samples = np.clip(a * mask + b * (1 - mask), 0.0, 1.0)
    return np.digitize(samples, np.linspace(0.0, 1.0, 100))

In [3]:
def p(theta, x):
    return torch.exp(theta[x]) / torch.sum(torch.exp(theta))

def J(output):
    return -torch.sum(torch.log(output))

In [4]:
theta = torch.zeros(100, requires_grad=True)
sgd = SGD([theta], lr=0.03)

num_epochs = 20
losses = []
sample = sample_data()
size_train = int(len(sample) * 0.8)
size_test = len(sample) - size_train
train, test = torch.utils.data.random_split(sample, (size_train, size_test))

x_train = torch.tensor(train, dtype=torch.long)
x_test = torch.tensor(test, dtype=torch.long)
dataset = torch.utils.data.DataLoader(x_train, batch_size=1000, shuffle=True)
train_losses = []
test_losses = []
for i in range(num_epochs):
    for x1 in dataset:
        sgd.zero_grad()
        output = p(theta, x1)
        loss = J(output)
        losses.append(float(loss))
        loss.backward()
        sgd.step()
    train_losses.append(float(J(p(theta, x_train)).double()) / size_train)
    test_losses.append(float(J(p(theta, x_test)).double()) / size_test)

In [5]:
df = pd.DataFrame()
df['loss'] = train_losses + test_losses
df['data'] = ['train'] * num_epochs + ['test'] * num_epochs
df['iter'] = list(range(num_epochs)) + list(range(num_epochs))

In [6]:
px.line(df, x='iter', y='loss', color='data')

In [7]:
predicted = p(theta, range(100)).tolist()
expected = np.zeros(100)
for x in sample[:1000]:
    expected[x] += 1
expected /= 1000
expected = list(expected)

df = pd.DataFrame()
df['prob'] = expected + predicted
df['data'] = ['expected'] * 100 + ['predicted'] * 100
df['val'] = list(range(100)) + list(range(100))

In [8]:
px.line(df, x='val', y='prob', color='data')

# Задание 2
## 2.1

In [10]:
dist = np.load('distribution.npy')

In [11]:
def gen_pair():
    x1 = np.random.choice(200, p=np.sum(dist, axis=1))
    x2 = np.random.choice(200, p=dist[x1] / np.sum(dist[x1]))
    return (x1, x2)

In [12]:
sample = np.array([gen_pair() for i in range(100000)])

In [13]:
size_train = int(len(sample) * 0.8)
size_test = len(sample) - size_train
train, test = torch.utils.data.random_split(sample, (size_train, size_test))

In [14]:
x_train = torch.tensor(train)
x_test = torch.tensor(test)
dataset = torch.utils.data.DataLoader(x_train, batch_size=1000, shuffle=True)

In [15]:
def p(theta, x):
    return torch.exp(theta[x]) / torch.sum(torch.exp(theta))

def J(output):
    return -torch.mean(torch.log(output))

In [69]:
class MLP(nn.Module):
    def __init__(self, nin, nout, n_hidden):
        super(MLP, self).__init__()
        self.layers = []
        self.layers.extend([
            Linear(nin, 20),
            Sigmoid(),
            Dropout(0.2)
        ])
        for i in range(1, nin - 1):
            self.layers.extend([
                Linear(20, 20),
                Sigmoid(),
                Dropout(0.2)
            ])
        self.layers.extend([
            Linear(20, nout),
            Softmax(dim=1)
        ])
        torch.nn.init.normal_(self.layers[0].weight)
        self.model = nn.Sequential(*self.layers)
    
    def forward(self, x):
        return self.model(x)
        
class LikelihoodModel(nn.Module):
    def __init__(self, n_hidden):
        super(LikelihoodModel, self).__init__()
        self.theta = nn.Parameter(torch.zeros(200))
        self.mlp = MLP(200, 200, n_hidden)
        
    def forward(self, batch):
        x1 = batch[:,0]
        x2 = batch[:,1]
        onehots = one_hot(x1, 200).float()
        px1 = p(self.theta, x1)
        px2 = self.mlp(onehots)[range(len(batch)), x2]
        output = px1 * px2
        return J(output)

In [136]:
#sgd = SGD([theta, *mlp.parameters()], lr=0.03)
num_epochs = 20

model = LikelihoodModel(3)
sgd = Adam([*model.parameters()])

train_losses = []
test_losses = []
for i in range(num_epochs):
    avg_loss = 0
    for batch in dataset:
        sgd.zero_grad()
        loss = model(batch)
        avg_loss += float(loss.double()) * len(batch)
        loss.backward()
        sgd.step()
    avg_loss /= len(x_train)
    train_losses.append(avg_loss)
    test_losses.append(float(model(x_train).double()))

In [137]:
df = pd.DataFrame()
df['loss'] = train_losses + test_losses
df['data'] = ['train'] * num_epochs + ['test'] * num_epochs
df['iter'] = list(range(num_epochs)) + list(range(num_epochs))

In [138]:
px.line(df, x='iter', y='loss', color='data')

In [21]:
x = sample[:,1]
y = sample[:,0]
fig = go.Figure(go.Histogram2d(x=x, y=y, histnorm='probability', nbinsx=200, nbinsy=200))
fig.show()

In [139]:
px1 = p(theta, range(200))
predicted = []
for i in range(200):
    predicted.append((mlp(one_hot(torch.tensor([i]), 200).float())[0] * px1[i]).tolist())

In [140]:
fig = go.Figure(go.Heatmap(z=predicted))
fig.show()

## 2.2

In [202]:
class MaskedLayer(nn.Linear):
    def __init__(self, m_in, m_out):
        super(MaskedLayer, self).__init__(len(m_in), len(m_out))
        a = m_in.reshape((1, len(m_in)))
        b = m_out.reshape((len(m_out), 1))
        self.mask = (a <= b).float()
    
    def forward(self, x):
        return F.linear(x, self.weight * self.mask, self.bias)
    
class MADE(nn.Module):
    def __init__(self, nin, n_blocks, n_hidden):
        super(MADE, self).__init__()
        self.nin = nin
        self.n_blocks = n_blocks
        start_m = torch.tensor(np.hstack([[i] * nin for i in range(n_blocks)]))
        m1 = start_m
        m2 = torch.randint(int(m1.min().long()), n_blocks - 1, (100,))
        self.layers = [
            MaskedLayer(m1, m2),
            Sigmoid(),
            #Dropout(0.2)
        ]
        for i in range(1, n_hidden - 1):
            m1 = m2
            m2 = torch.randint(int(m1.min().long()), n_blocks - 1, (100,))
            self.layers.extend([
                MaskedLayer(m1, m2),
                Sigmoid(),
                #Dropout(0.2)
            ])
        m1 = m2
        m2 = start_m
        self.layers.append(MaskedLayer(m1, m2))
        torch.nn.init.normal_(self.layers[0].weight)
        self.seq = nn.Sequential(*self.layers)
        self.softmax = nn.Softmax(dim=-1)
    
    def forward(self, x):
        z = self.seq(x)
        z = z.reshape((*x.shape[:-1], self.n_blocks, self.nin))
        ans = self.softmax(z).transpose(0, -2)
        #print(ans.shape)
        return ans

class MADELikelihoodModel(nn.Module):
    def __init__(self, n_hidden):
        super(MADELikelihoodModel, self).__init__()
        self.made = MADE(200, 2, n_hidden)
    
    def forward(self, batch):
        #print(batch.shape)
        x1 = batch[:,0]
        x2 = batch[:,1]
        x = one_hot(batch).reshape(-1, 400)
        #print(x.shape)
        px1, px2 = self.made(x.float())
        #print(px1.shape)
        #print(px2.shape)
        #print(len(batch))
        #print(x1, x2)
        ox1 = px1[torch.arange(len(batch)), x1]
        ox2 = px2[torch.arange(len(batch)), x2]
        output = ox1 * ox2
        return J(output)

In [None]:
#sgd = SGD([theta, *mlp.parameters()], lr=0.03)
num_epochs = 20

model = MADELikelihoodModel(5)
sgd = Adam([*model.parameters()])

train_losses = []
test_losses = []
for i in range(num_epochs):
    avg_loss = 0
    for batch in dataset:
        sgd.zero_grad()
        loss = model(batch)
        avg_loss += float(loss.double()) * len(batch)
        loss.backward()
        sgd.step()
    avg_loss /= len(x_train)
    train_losses.append(avg_loss)
    test_losses.append(float(model(x_train).double()))

In [None]:
df = pd.DataFrame()
df['loss'] = train_losses + test_losses
df['data'] = ['train'] * num_epochs + ['test'] * num_epochs
df['iter'] = list(range(num_epochs)) + list(range(num_epochs))

In [None]:
px.line(df, x='iter', y='loss', color='data')

In [None]:
px1 = p(theta, range(200))
predicted = []
for i in range(200):
    predicted.append((mlp(one_hot(torch.tensor([i]), 200).float())[0] * px1[i]).tolist())

In [None]:
fig = go.Figure(go.Heatmap(z=predicted))
fig.show()