# Matija Kolarić

# Descriptors and Weak References
## How to create Django-like models & fields with low-level Python?




# About me

* Primarily Python/Django developer since 2008

* Currently leaving CARNET (a long story)
 
* JAKO SEXY obrt za usluge (freelance)


* https://matijakolaric.com
* https://www.linkedin.com/in/matijakolaric/

# Generate, parse and validate CWR format
<code style="white-space: pre;">
HDRPB000000199THE SENDER                                   01.102018032210203720180322
GRHNWR0000102.100000000000
NWR0000000000000000THE WORK                                                      CLAIMDIGITAL01           00000000            POP000000Y      ORI         MRS. SMITH                                N00000000000
SPU000000000000000101HCJ3EBJT6THE PUBLISHER                                 E 00000000000000000297              0100250003405000   05000 N
SPU000000000000000201ILE7GIQMJTHE SENDER                                    AM00000000000000000199              0100000003400000   00000 N
SPT0000000000000003ILE7GIQMJ      025000500005000I2136N001
SWR00000000000000042RDRB3FHXDOE                                          JOHN                           CA0000000000000000039501002500   00000   00000 N
SWT00000000000000052RDRB3FHX025000000000000I2136N001
PWR0000000000000006HCJ3EBJT6THE PUBLISHER                                                            2RDRB3FHX
OWR0000000000000007JZL2GMGV3DOE                                          JANE                           CA00000000000000000000   05000   05000   05000 N
GRT000010000000100000010   0000000000
TRL000010000000100000012
</code>

```python
# ~10 years ago...

def clean(self):
    if not self.uploaded_file and not self.content:
        raise ValidationError('Please attach the file.')

    if not self.content:
        self.content = self.uploaded_file.read()

        try:
            # Try ASCII and change to default
            c = self.content.encode('ascii')
            self.encoding = 1
        except:
            # Not ASCII - use whatever is specified, good luck!
            try:
                content = []
                for i, c in enumerate(self.content.splitlines(1)):
                    c = c.decode(self.encoding.code)
                    try:
                        c  = unidecode(c).upper()
                    except:
                        pass
                    content.append(c)
                self.content = ''.join(content)
            except Exception:
                # let's create a proper report
                counter = 0
                firstline = 0
                otherlines = []
                for i, line in enumerate(self.content.splitlines()):
                    try:
                        line.encode('ascii')
                    except UnicodeDecodeError:
                        if not firstline:
                            firstline = (i + 1,
                                         unicode(line, errors='ignore'))
                        else:
                            otherlines.append(str(i + 1))
                        counter += 1
                msg = '''{0} Non-ASCII character(s) detected, starting with
                    line {1[0]}: {1[1]} including lines:{2}'''
                msg = msg.format(counter, firstline, ', '.join(otherlines))
                raise ValidationError(msg)

    self.uploaded_file = None

    # HDR check & basic data
    if self.content[0:3] != 'HDR':
        raise ValidationError('HDR Error')

    # HDR is first line, no regex in sender creation
    sender_id = self.content[5:14]
    try:
        self.created = datetime.strptime(self.content[64:78],
                                         '%Y%m%d%H%M%S')
    except:
        msg = 'HDR datetime error: {}'
        msg = msg.format(self.content[64:78])
        raise ValidationError(msg)
    trl = re.findall(self.RE_TRL, self.content)

    # Exactly one TRL allowed
    if len(trl) != 1:
        raise ValidationError('TRL Error')

    # Group counter, also counting works and lines
    gc_1 = 0
    wc_3 = 0
    lc_3 = 2  # HDR & TRL records
    for gr in re.findall(self.RE_GR, self.content):
        gc_1 += 1
        if gc_1 != int(gr[0]):
            msg = 'Group ID mismatch: {0} vs {1}'.format(gc_1, gr[0])
            raise ValidationError(msg)
        wc_3 += int(gr[1])
        lc_3 += int(gr[2])
    gc_2 = int(trl[0][0])
    if gc_1 != gc_2:
        msg = 'Group count mismatch: {0} vs {1}'.format(gc_1, gc_2)
        raise ValidationError(msg)

    # Work counter
    wc_1 = int(trl[0][1])
    wc_2 = len(re.findall(self.RE_WORK_COUNTER, self.content))
    if wc_1 != wc_2 or wc_1 != wc_3:
        msg = 'Work count mismatch: {0} vs {1} vs {2}'
        msg = msg.format(wc_1, wc_2, wc_3)
        raise ValidationError(msg)

    # Line counter
    lc_1 = len(self.content.splitlines())
    lc_2 = int(trl[0][2])
    if lc_1 != lc_2 or lc_1 != lc_3:
        msg = 'Line count mismatch: {0} vs {1} vs {2}'
        msg = msg.format(lc_1, lc_2, lc_3)
        raise ValidationError(msg)
    self.group_count = gc_1
    self.work_count = wc_1
    self.line_count = lc_1
```

```python
# Now - fields

class EdiConstantField(EdiField):
    """EDI field for constants."""
    
    def __init__(self, size, constant=None, *args, **kwargs):
        if constant:
            if len(constant) == size:
                self._constant = constant
            else:
                raise AttributeError(
                    f'Value "{ value }" is not { size } characters long.')
        else:
            self._constant = ' ' * size
        super().__init__(size, *args, **kwargs)

    def __set__(self, instance, value):
        if value != self._constant:
            super().__set__(instance, value)
            raise FieldWarning(
                f'Value must be "{ self._constant }", not "{ value }"')
        super().__set__(instance, value)
```

```python
# Now - records

class EdiTRL(EdiRecord):
    """File trailer, minimal requirements."""
    
    record_type = EdiConstantField(size=3, constant='TRL', mandatory=True)
    group_count = EdiNumericField(size=5, mandatory=True)
    transaction_count = EdiNumericField(size=8, mandatory=True)
    record_count = EdiNumericField(size=8, mandatory=True)


class EdiGRH(EdiRecord):
    """Group header, minimal requirements."""
    
    record_type = EdiConstantField(size=3, constant='GRH', mandatory=True)
    transaction_type = EdiField(size=3, mandatory=True)
    group_id = EdiNumericField(size=5, mandatory=True)


class EdiGRT(EdiRecord):
    """Group trailer, minimal requirements."""
    
    record_type = EdiConstantField(size=3, constant='GRT', mandatory=True)
    group_id = EdiNumericField(size=5, mandatory=True)
    transaction_count = EdiNumericField(size=8, mandatory=True)
    record_count = EdiNumericField(size=8, mandatory=True)
```

# https://github.com/musicmetadata/edi


# Descriptors

* objects that have special behavior when they’re accessed as attributes of other objects

* implement some of:

```python
__get__(self, obj, type=None) -> object

__set__(self, obj, value) -> None

__delete__(self, obj) -> None

__set_name__(self, owner, name)
```

# https://realpython.com/python-descriptors/

In [1]:
class NameField(object):

    def __set__(self, obj, value):
        pass
    
    def __get__(self, obj, type=None):
        pass

    
class Person(object):
    
    first_name = NameField()
    last_name = NameField()
    
    def __str__(self):
        return f'{ self.first_name } {self.last_name}'


In [None]:
person = Person()
person.first_name = 'Matija'
person.last_name = 'Kolarić'
print(person)


In [2]:
person = Person()
person.first_name = 'Matija'
person.last_name = 'Kolarić'
print(person)


None None


# Storing data

We must store it somewhere, so let us try a dictionary...

In [3]:
class NameField(object):
    
    def __init__(self):
        self.valuedict = {}
    
    def __set__(self, obj, value):
        self.valuedict[obj] = value
    
    def __get__(self, obj, type=None):
        return self.valuedict[obj]


In [None]:
class Person(object):
    
    first_name = NameField()
    last_name = NameField()
    
    def __str__(self):
        return f'{ self.first_name } {self.last_name}'


person = Person()
person.first_name = 'Matija'
person.last_name = 'Kolarić'
print(person)


In [4]:
class Person(object):
    
    first_name = NameField()
    last_name = NameField()
    
    def __str__(self):
        return f'{ self.first_name } {self.last_name}'


person = Person()
person.first_name = 'Matija'
person.last_name = 'Kolarić'
print(person)


Matija Kolarić


# The End

# The End ... is not even close

We have a memory leak!!

In [None]:
import names  # name generator
import guppy  # heap analysis
import gc  # garbage collector

heapy = guppy.hpy()
heapy.setref()  # report differences from this point

for i in range(1000):
    person = Person()
    person.first_name = names.get_first_name()
    person.last_name = names.get_last_name()
gc.collect()
print(heapy.heap()[0:3])

In [5]:
import names  # name generator
import guppy  # heap analysis
import gc  # garbage collector

heapy = guppy.hpy()
heapy.setref()  # report differences from this point

for i in range(1000):
    person = Person()
    person.first_name = names.get_first_name()
    person.last_name = names.get_last_name()
gc.collect()
print(heapy.heap()[0:3])

Partition of a set of 3003 objects. Total size = 167545 bytes.
 Index  Count   %     Size   % Cumulative  % Kind (class / dict of class)
     0   2000  67   110049  66    110049  66 str
     1   1000  33    56000  33    166049  99 __main__.Person
     2      3   0     1496   1    167545 100 types.FrameType


# Remember  `NameField.valuedict` ?

It keeps our Person objects in keys... and as long as they are ________ referenced, garbage collector will not touch them!

We could delete the objects and handle the deletion in a ``__delete__`` method, but that is not very Pythonic.

# The might of the Weak references

`NameField.valuedict` keeps our Person objects in keys... and as long as they are **strongly** referenced, garbage collector will not touch them!

We will use *weak references* instead. When an object is only referenced by *weak references*, garbage collector will delete it and reclaim the memory.


In [6]:
from weakref import WeakKeyDictionary


class NameField(object):
    
    def __init__(self):
        self.valuedict = WeakKeyDictionary()  # The right way
    
    def __set__(self, obj, value):
        self.valuedict[obj] = value
    
    def __get__(self, obj, type=None):
        return self.valuedict[obj]


class Person(object):
    
    first_name = NameField()
    last_name = NameField()
    
    def __str__(self):
        return f'{ self.first_name } {self.last_name}'

In [None]:
import names  # name generator
import guppy  # heap analysis
import gc  # garbage collector

heapy = guppy.hpy()
heapy.setref()  # report differences from this point

for i in range(1000):
    person = Person()
    person.first_name = names.get_first_name()
    person.last_name = names.get_last_name()
gc.collect()
print(heapy.heap()[0:3])

In [7]:
import names  # name generator
import guppy  # heap analysis
import gc  # garbage collector

heapy = guppy.hpy()
heapy.setref()  # report differences from this point

for i in range(1000):
    person = Person()
    person.first_name = names.get_first_name()
    person.last_name = names.get_last_name()
gc.collect()
print(heapy.heap()[0:3])

Partition of a set of 19 objects. Total size = 2256 bytes.
 Index  Count   %     Size   % Cumulative  % Kind (class / dict of class)
     0      3  16     1496  66      1496  66 types.FrameType
     1      6  32      480  21      1976  88 builtins.weakref
     2     10  53      280  12      2256 100 int


In [8]:
from weakref import WeakKeyDictionary


class NameField(object):
    
    def __init__(self):
        self.valuedict = WeakKeyDictionary()  # The right way
    
    def __set__(self, obj, value):
        if not isinstance(value, str):
            raise AttributeError('Value is not a string')
        value = value.strip()
        self.valuedict[obj] = value
    
    def __get__(self, obj, type=None):
        return self.valuedict.get(obj)


In [None]:
class Person(object):
    
    def __init__(self, first_name=None, last_name=None):
        self.first_name = first_name
        self.last_name = last_name
    
    first_name = NameField()
    last_name = NameField()
    
    def __str__(self):
        return f'{ self.first_name } {self.last_name}'
    
print(Person('Matija', 'Kolarić'))


In [9]:
class Person(object):
    
    def __init__(self, first_name=None, last_name=None):
        self.first_name = first_name
        self.last_name = last_name
    
    first_name = NameField()
    last_name = NameField()
    
    def __str__(self):
        return f'{ self.first_name } {self.last_name}'
    
print(Person('Matija', 'Kolarić'))


Matija Kolarić


In [10]:
    
class Company(object):

    def __init__(self, name=None):
        self.name = name
        
    name = NameField()
    
    def __str__(self):
        return self.name
    

print(Company('ACME'))

ACME
