In [1]:
import torch
import torch.nn as nn
from torch.nn import functional as F

import pandas as pd
import numpy as np

import os, sys
sys.path.append("../../../../")

from src.core.module import Module
from src.core.losses import CrossEntropy, MSE, BCE
from src.core.optim import AdamW, Standard
from src.core.tensor import Tensor
from src.utils.backend import xp

In [2]:
df = pd.read_csv("../../data.csv")
df['Quality'] = df['Quality'].apply(lambda x: 1 if x == "Good" else 0)
x = np.array(df.drop('Quality', axis=1).values)
y = np.array(df['Quality'].values).reshape((-1, 1))

In [3]:
class Net(Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.fc1 = self.linear(input_size, hidden_size, bias=False)
        self.fc2 = self.linear(hidden_size, output_size, bias=False)
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x
    
    def train(self, x: Tensor, y: Tensor, optimizer, num_epochs=100):
        for epoch in range(num_epochs):
            y_hat = self.forward(x)
            
            loss = BCE(y_hat, y)
            
            loss.backward()
            
            optimizer.step()
            optimizer.zero_grad()
            
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss.data}")          

In [4]:
net = Net(7,4,1)
net._build(x.shape)
optimizer = Standard(net.parameters(), lr=0.001)
net.train(Tensor(x), Tensor(y), optimizer, 50000)

Epoch 0, Loss: 0.8684522718552586
Epoch 100, Loss: 0.8464026275243832
Epoch 200, Loss: 0.8524561470133099
Epoch 300, Loss: 0.8743869109518382
Epoch 400, Loss: 0.9037543175717707
Epoch 500, Loss: 0.9357752855061997
Epoch 600, Loss: 0.9680390195527392
Epoch 700, Loss: 0.9994278790775899
Epoch 800, Loss: 1.0294772122652744
Epoch 900, Loss: 1.0580402322558484
Epoch 1000, Loss: 1.0851201894589877
Epoch 1100, Loss: 1.1107874168153935
Epoch 1200, Loss: 1.1351385431189434
Epoch 1300, Loss: 1.1582766725773816
Epoch 1400, Loss: 1.180302057405773
Epoch 1500, Loss: 1.2013080425378992
Epoch 1600, Loss: 1.2213796462177566
Epoch 1700, Loss: 1.240593430356249
Epoch 1800, Loss: 1.2590179690923262
Epoch 1900, Loss: 1.2767145610902135
Epoch 2000, Loss: 1.293738006703213
Epoch 2100, Loss: 1.3101373631768958
Epoch 2200, Loss: 1.325956639254902
Epoch 2300, Loss: 1.3412354154828094
Epoch 2400, Loss: 1.3560093890314553
Epoch 2500, Loss: 1.3703108477726438
Epoch 2600, Loss: 1.3841690807581415
Epoch 2700, Loss:

KeyboardInterrupt: 

In [8]:
# Inputs

num_samples = 1000
x = Tensor(xp.random.randn(num_samples, 7), requires_grad=False)
y = Tensor((xp.random.rand(num_samples, 1) > 0.5).astype(xp.float32), requires_grad=False)

# Model
model = Net(7, 4, 1)
optimizer = Standard(model.parameters(), lr=0.01)  # Use a big LR for fast check

# Train for like 500 steps
for step in range(5000):
    y_hat = model.forward(x)
    loss = BCE(y_hat, y)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if step % 100 == 0:
        print("step", step, "loss", loss.data)


step 0 loss 0.8076825604811081
step 100 loss 0.7622999420352848
step 200 loss 0.736710288784231
step 300 loss 0.7218319802758465
step 400 loss 0.7128380315685587
step 500 loss 0.7071656087227384
step 600 loss 0.7034308205133043
step 700 loss 0.7008667903180094
step 800 loss 0.699035743618891
step 900 loss 0.6976799498494384
step 1000 loss 0.69664295388873
step 1100 loss 0.6958269009008714
step 1200 loss 0.695168796062181
step 1300 loss 0.6946269431904931
step 1400 loss 0.6941729955782154
step 1500 loss 0.6937871799086663
step 1600 loss 0.693455358478106
step 1700 loss 0.6931671815583634
step 1800 loss 0.6929149006823363
step 1900 loss 0.6926925910738568
step 2000 loss 0.6924956323783472
step 2100 loss 0.6923203555053383
step 2200 loss 0.692163798178007
step 2300 loss 0.6920235328123987
step 2400 loss 0.6918975432916381
step 2500 loss 0.6917841353023197
step 2600 loss 0.691681870052673
step 2700 loss 0.6915895145175944
step 2800 loss 0.691506003532092
step 2900 loss 0.691430410497989
st

In [1]:
import nltk
nltk.download("punkt")
from nltk.tokenize import sent_tokenize

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\luequ\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
from nltk.tokenize import sent_tokenize

def split_text(text, max_tok, tokenizer):
    sentences = sent_tokenize(text)
    chunks = []
    curr = []

    for sentence in sentences:
        tokens = tokenizer.encode(sentence)

        if len(tokens) > max_tok:
            continue

        if len(tokens) + len(curr) > max_tok:
            chunks.append(curr)
            curr = []

        curr.extend(tokens)

    if curr:
        chunks.append(curr)

    return chunks
