#### Tuples & DataClasses

In [5]:
# Helper functions
import inspect

def print_name_value_addr(variable):
    frame = inspect.currentframe()
    frame = inspect.getouterframes(frame)[1]
    ctx = inspect.getframeinfo(frame[0]).code_context[0].strip()
    single_arg = ctx[ctx.find('(') + 1:-1].split(',')[0]
    mem_variable = id(variable)
    print(f'{single_arg} = {variable}, at memory addr({mem_variable})')
    

def print_name_value(variable):
    frame = inspect.currentframe()
    frame = inspect.getouterframes(frame)[1]
    ctx = inspect.getframeinfo(frame[0]).code_context[0].strip()
    single_arg = ctx[ctx.find('(') + 1:-1].split(',')[0]
    print(f'{single_arg} = {variable}')
    
def print_default_dict(variable):
    variable = dict(variable)
    frame = inspect.currentframe()
    frame = inspect.getouterframes(frame)[1]
    ctx = inspect.getframeinfo(frame[0]).code_context[0].strip()
    variable_name = ctx[ctx.find('(') + 1:-1].split(',')[0]
    print(f'{variable_name} = {variable}')

#### Python Tuples

In [6]:
a = [1,2]
print(a)
a[0] = 100
print(a)

[1, 2]
[100, 2]


In [3]:
# Tuples are imutable
a = (1,2)
print(a)
a[0] = 100
print(a)

(1, 2)


TypeError: 'tuple' object does not support item assignment

In [4]:
def op1():
    a = (1,2)
    print(a)
    a[0] = 100
    print(a)
    
op1()

(1, 2)


TypeError: 'tuple' object does not support item assignment

In [17]:
d = {} # dict()
d['San Francisco'] = 'CA'
d['New York'] = 'NY'
for i in range(10):
    d[f'city-{i}'] = f'St-{i}'
print(d)

{'San Francisco': 'CA', 'New York': 'NY', 'city-0': 'St-0', 'city-1': 'St-1', 'city-2': 'St-2', 'city-3': 'St-3', 'city-4': 'St-4', 'city-5': 'St-5', 'city-6': 'St-6', 'city-7': 'St-7', 'city-8': 'St-8', 'city-9': 'St-9'}


In [18]:
# How dictionaries work.
d['San Francisco']
# offset = hash('San Francisco')
offset = hash((1,2))
offset % 1001
# 0 1 2 3 4 5 6.    828     1000
# x x x x x x x ...([('San Francisco','CA')], x x ... x


857

In [7]:
# Tuples are like imutable records.
# They are a convenient way to pack data.
address = (1, 'Main St 123', 'Marin', 'CA', '94027', 1990, 2001)
print_name_value(address)
print_name_value(address[2])

address = (1, 'Main St 123', 'Marin', 'CA', '94027', 1990, 2001)
address[2] = Marin


In [10]:
# A little more readable but not great.
FLD_ID = 0
FLD_CITY = 2
address = (1, 'Main St 123', 'Marin', 'CA', '94027', 1990, 2001)
print_name_value(address)
print_name_value(address[FLD_CITY])

address = (1, 'Main St 123', 'Marin', 'CA', '94027', 1990, 2001)
address[FLD_CITY] = Marin


In [8]:
# Python 3 added data classes that make the code clearer.
from dataclasses import dataclass

@dataclass(frozen=False)
class AddressRecord:
    """A record representing an employee address"""
    idx: int
    street: str
    city: str
    state: str    
    zip_code: str
    move_in: int
    move_out: int = 0

# dataclasses are records.
address2 = AddressRecord(1, 'Main St 123', 'Marin', 'CA', '94027', 1990, 2001)
print_name_value(address2)
address3 = AddressRecord(
    idx=1, street='Main St 123', city='Marin', 
    state='CA', zip_code='94027', move_in=1990, 
    move_out=2001)
print_name_value(address3)
# A lot better than print(address[2])
print_name_value(address2.city)

address2 = AddressRecord(idx=1, street='Main St 123', city='Marin', state='CA', zip_code='94027', move_in=1990, move_out=2001)
address3 = AddressRecord(idx=1, street='Main St 123', city='Marin', state='CA', zip_code='94027', move_in=1990, move_out=2001)
address2.city = Marin


In [27]:
address2.city = 'NY'
address2

AddressRecord(idx=1, street='Main St 123', city='NY', state='CA', zip_code='94027', move_in=1990, move_out=2001)

In [25]:
# If I create a record and set Frozen=True its immutable.
@dataclass(frozen=True)
class AddressRecord2:
    """A record representing an employee address"""
    idx: int
    street: str
    city: str
    state: str    
    zip_code: str
    move_in: int
    move_out: int = 0

immutable_addr = AddressRecord2(
    idx=1, street='Main St 123', city='Marin', 
    state='CA', zip_code='94027', move_in=1990, 
    move_out=2001)
print(immutable_addr)
immutable_addr.city = 'x'


AddressRecord2(idx=1, street='Main St 123', city='Marin', state='CA', zip_code='94027', move_in=1990, move_out=2001)


FrozenInstanceError: cannot assign to field 'city'

#### Python Collections - Sets

In [9]:
from pprint import pprint
list_with_duplicates = list(range(6)) * 3
print_name_value(list_with_duplicates)

list_with_duplicates = [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5]


In [10]:
my_set = set()
for elm in list_with_duplicates:
    my_set.add(elm)
print_name_value(my_set)

my_set = {0, 1, 2, 3, 4, 5}


In [14]:
# Be carefull with strings, they are also arrays in python.
values = ['ab', 'cd', 'ef']
def print_results(v):
    for value in v:
        print(value, end=', ')
    print(' ')
    
print_results(values)  
print('-' * 20)
val2 = 'san francisco'
print_results(val2)       

ab, cd, ef,  
--------------------
s, a, n,  , f, r, a, n, c, i, s, c, o,  


In [15]:
# Strings are considered array of chars, when used to create sets become set of chars.
s_sf = set('san francisco')
s_ma = set('marin')
print(s_sf)
print(s_ma)

{'r', 'o', ' ', 'f', 'c', 'i', 'n', 's', 'a'}
{'r', 'm', 'n', 'i', 'a'}


In [41]:
s_sf - s_ma # chars in s_sf but not in s_ma

{' ', 'c', 'f', 'o', 's'}

In [43]:
s_sf | s_ma # In s_sf or in s_sf

{' ', 'a', 'c', 'f', 'i', 'm', 'n', 'o', 'r', 's'}

In [44]:
s_sf & s_ma # Both in s_sf and s_ma

{'a', 'i', 'n', 'r'}

In [45]:
s_sf ^ s_ma # Either in one or the other

{' ', 'c', 'f', 'm', 'o', 's'}

In [46]:
list(range(1, 50, 3))

[1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49]

In [47]:
# There are also set comprehentions.
list_s3 = [x % 9 for x in range(1, 50, 3)]
print_name_value(list_s3)
set_s3 = {x % 9 for x in range(1, 50, 3)}
print_name_value(set_s3)

list_s3 = [1, 4, 7, 1, 4, 7, 1, 4, 7, 1, 4, 7, 1, 4, 7, 1, 4]
set_s3 = {1, 4, 7}


In [48]:
s4 = {1,2,3,5}
s5 = {1,2,3,4,5}
s6 = {8, 9}

In [49]:
# Test if element belongs to set.
3 in s4

True

In [50]:
2 not in s5

False

In [51]:
s4.isdisjoint(s6)

True

In [52]:
s4.isdisjoint(s5)

False

In [53]:
s4.issubset(s5)

True

In [54]:
s4.union(s5, s6)

{1, 2, 3, 4, 5, 8, 9}

In [55]:
s4.intersection(s5)

{1, 2, 3, 5}

In [56]:
s5.difference(s4)

{4}

#### Sets are very usefull when trying to compare properties are present in different files


In [16]:
input_txt = """
rainy,pressure,
overcast,shower,sunrise,
dry,tornado,humidity,cold,heat,
wind,cloudy,heat wave,fog,breeze,humid"""
splited_text = input_txt.split(',')
print('Step 1')
print(splited_text)
weather_words = [x.strip('\n') for x in splited_text]
print('Step 2')
print(weather_words)

Step 1
['\nrainy', 'pressure', '\novercast', 'shower', 'sunrise', '\ndry', 'tornado', 'humidity', 'cold', 'heat', '\nwind', 'cloudy', 'heat wave', 'fog', 'breeze', 'humid']
Step 2
['rainy', 'pressure', 'overcast', 'shower', 'sunrise', 'dry', 'tornado', 'humidity', 'cold', 'heat', 'wind', 'cloudy', 'heat wave', 'fog', 'breeze', 'humid']


In [17]:
print('[' + ', '.join(weather_words) +']')

[rainy, pressure, overcast, shower, sunrise, dry, tornado, humidity, cold, heat, wind, cloudy, heat wave, fog, breeze, humid]


In [61]:
print('|'.join(weather_words))

rainy|pressure|overcast|shower|sunrise|dry|tornado|humidity|cold|heat|wind|cloudy|heat wave|fog|breeze|humid


In [20]:
# Good
with open('new_file.txt', 'w') as out_f:
    for i in range(10):
        out_f.write(str(i)+'\n')

In [22]:
# Good - If you use the "with" construct, it guarantees that files will be closed.

def process_line(in_file, out_file1, out_file2):
    for line in in_file:
        clean_line = line.strip()
        out_file1.write(clean_line+'-25\n')  
        
with open('new_file.txt', 'r') as in_f:
    with open('another_file.txt', 'w') as out_f:
        with open('a_third_file.txt', 'w') as out_f2:
            process_line(in_f, out_f, out_f2)
        

In [39]:
# Bad - If there is an error file handles could be left open.
out_f = open('new_file.txt', 'w')
for i in range(10):
    out_f.write(str(i)+'\n')
out_f.close()

In [24]:
with open('new_file.txt', 'r') as in_f:
    lines = in_f.readlines()
    for line in lines:
        clean_line = line.strip()
        print(clean_line)

0
1
2
3
4
5
6
7
8
9


In [25]:
# Stopped ----
# Lets generate some sets.
import random
random.seed(100)  # This guarantees that the random numbers are always the same.
experiment = {x for x in weather_words if random.randint(1,3)!=2}
control = {x for x in weather_words if random.randint(1,3)!=2}
print_name_value(experiment)
print_name_value(control)

experiment = {'rainy', 'shower', 'wind', 'heat', 'cloudy', 'heat wave', 'sunrise', 'fog', 'breeze', 'tornado'}
control = {'shower', 'overcast', 'heat', 'pressure', 'wind', 'cloudy', 'cold', 'sunrise', 'breeze', 'tornado'}


In [26]:
print('Common in both    : {0}'.format(experiment & control))
print('Only in Control   : {0}'.format(control - experiment))
print('Only in Experiment: {0}'.format(experiment - control))

Common in both    : {'shower', 'wind', 'cloudy', 'heat', 'sunrise', 'breeze', 'tornado'}
Only in Control   : {'cold', 'overcast', 'pressure'}
Only in Experiment: {'rainy', 'fog', 'heat wave'}


In [27]:
a = list(range(10))
b = {i:True for i in range(10)}
print_name_value(a)
print_name_value(b)
b[5] = False
print_name_value(b)

a = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
b = {0: True, 1: True, 2: True, 3: True, 4: True, 5: True, 6: True, 7: True, 8: True, 9: True}
b = {0: True, 1: True, 2: True, 3: True, 4: True, 5: False, 6: True, 7: True, 8: True, 9: True}


In [68]:
my_catalog = dict() # {}
for word in weather_words:
    key = word[0] # First char
    if key in my_catalog:
        my_catalog[key].append(word)
    else:
        my_catalog[key] = [word]
print_name_value(my_catalog)

my_catalog = {'r': ['rainy'], 'p': ['pressure'], 'o': ['overcast'], 's': ['shower', 'sunrise'], 'd': ['dry'], 't': ['tornado'], 'h': ['humidity', 'heat', 'heat wave', 'humid'], 'c': ['cold', 'cloudy'], 'w': ['wind'], 'f': ['fog'], 'b': ['breeze']}


In [73]:
x1 = my_catalog['r']
print_name_value(x1)
x2 = my_catalog.get('c', '<not found>')
print_name_value(x2)
x3 = my_catalog.get('__', ['unk'])
print_name_value(x3)

x1 = ['rainy']
x2 = ['cold', 'cloudy']
x3 = ['unk']


In [28]:
# Another way to group by first letter.
from collections import defaultdict

my_catalog = defaultdict(list)
for word in weather_words:
    key = word[0]
    my_catalog[key].append(word)
print_default_dict(my_catalog)

my_catalog = {'r': ['rainy'], 'p': ['pressure'], 'o': ['overcast'], 's': ['shower', 'sunrise'], 'd': ['dry'], 't': ['tornado'], 'h': ['humidity', 'heat', 'heat wave', 'humid'], 'c': ['cold', 'cloudy'], 'w': ['wind'], 'f': ['fog'], 'b': ['breeze']}


In [62]:
# Just keep the counts.
from collections import defaultdict

my_catalog = defaultdict(int) # Type int to capture counts.
for word in weather_words:
    key = word[0]
    my_catalog[key] += 1
print_default_dict(my_catalog)


my_catalog = {'r': 1, 'p': 1, 'o': 1, 's': 2, 'd': 1, 't': 1, 'h': 4, 'c': 2, 'w': 1, 'f': 1, 'b': 1}
