In [1]:
from __future__ import division # no need for python3, but just in case used w/ python2

import sys
import time
from svector import svector

def read_from(textfile):
    for line in open(textfile):
        label, words = line.strip().split("\t")
        yield (1 if label=="+" else -1, words.split())

def make_vector(words):
    v = svector()
    for word in words:
        v[word] += 1
    return v
    
def test(devfile, model):
    tot, err = 0, 0
    for i, (label, words) in enumerate(read_from(devfile), 1): # note 1...|D|
        err += label * (model.dot(make_vector(words))) <= 0
    return err/i  # i is |D| now
            

In [2]:
import time

def train(trainfile, devfile, epochs=5):
    t = time.time()
    best_err = 1.0
    model = svector()
    cumulative_model = svector()
    updates = 0

    for it in range(1, epochs + 1):
        for i, (label, words) in enumerate(read_from(trainfile), 1):
            sent = make_vector(words)  # Assume make_vector creates an svector from words
            activation = model.dot(sent)
            if label * activation <= 0:
                update = label * sent
                model += update
                updates += 1
            cumulative_model += model  # Add the current model to the cumulative model
        
        # Calculate the averaged model at the end of each epoch
        if updates > 0:
            averaged_model = cumulative_model * (1/updates)
        else:
            averaged_model = model.copy()

        # Test the averaged model on the development set
        dev_err = test(devfile, averaged_model)
        best_err = min(best_err, dev_err)
        print(f"epoch {it}, updates {updates / i:.1f}%, dev {dev_err * 100:.1f}%")

    # Final averaged model is calculated outside the loop after all epochs
    if updates > 0:
        averaged_model = cumulative_model * (1/updates)
    else:
        averaged_model = model.copy()

    print(f"best dev err {best_err * 100:.1f}%, |w|={len(model)}, time: {time.time() - t:.1f} secs")
    return averaged_model


In [3]:
trainfile = 'train.txt'
devfile = 'dev.txt'
train(trainfile, devfile, epochs=10)

TypeError: make_vector() got an unexpected keyword argument 'bias'