# Errata and Addenda

# Static Weirdness Explained

In [225]:
class Foo():
    
    my_list = []    # Lists are mutable
    my_int = 1      # Integers are not
    my_tuple = ()   # Tuples are not
    
    def mutate_things(self):
        self.my_list.append(1) # MUTATING THE OBJECT DEFINE AT THE CLASS LEVEL
        self.my_int = 2
        self.my_tuple = (1,3)
                
    def change_things(self):
        self.my_list = [1] # REMOVING THE CONNECTION TO THE CLASS LEVEL
        self.my_int = 2
        self.my_tuple = (1,3)
        
        # If you mutate a mutable, then it will change

In [226]:
def foo_state(foo): 
    global Foo
    print("Class state:", Foo.my_list, Foo.my_int, Foo.my_tuple)
    print("Object state:", foo.my_list, foo.my_int, foo.my_tuple)

In [227]:
foo1 = Foo()

In [228]:
foo1.mutate_things()

In [229]:
foo_state(foo1)

Class state: [1] 1 ()
Object state: [1] 2 (1, 3)


In [230]:
foo1.change_things()

In [231]:
foo_state(foo1)

Class state: [1] 1 ()
Object state: [1] 2 (1, 3)


# Narrow vs Wide Tables

In [232]:
import pandas as pd

In [233]:
pets = pd.Series("cat dog ferret snake turtle parraot".split()).sample(1000, replace=True).to_list()
people = pd.Series("A B C D E F G".split()).sample(1000, replace=True).to_list()   
NARROW = pd.DataFrame(dict(pet=pets, owner=people)).groupby(['owner', 'pet']).pet.count().to_frame('n')

This is a narrow table. 

It has few columns and many rows. 

Columns are types of things, and values in rows are either instances or subtypes.

In [234]:
NARROW

Unnamed: 0_level_0,Unnamed: 1_level_0,n
owner,pet,Unnamed: 2_level_1
A,cat,31
A,dog,26
A,ferret,19
A,parraot,18
A,snake,24
A,turtle,26
B,cat,29
B,dog,23
B,ferret,31
B,parraot,24


In [235]:
WIDE = NARROW.n.unstack()

This is a wide table. 

One column's values are projected onto the feature space (as columns).

The othe column becomes a unique list (as is the feature space).

In [236]:
WIDE

pet,cat,dog,ferret,parraot,snake,turtle
owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A,31,26,19,18,24,26
B,29,23,31,24,29,22
C,21,22,19,23,25,25
D,17,15,15,22,20,28
E,20,30,21,25,16,26
F,22,28,32,24,25,26
G,23,19,20,28,35,26


Narrow columns are more manageable by databases.

Wide columns are more usable for analysis.

You can do these things with narrow tables using `.group_by()`

In [237]:
A = WIDE / WIDE.sum()

In [238]:
B = WIDE.T / WIDE.T.sum()

In [239]:
A

pet,cat,dog,ferret,parraot,snake,turtle
owner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A,0.190184,0.159509,0.121019,0.109756,0.137931,0.145251
B,0.177914,0.141104,0.197452,0.146341,0.166667,0.122905
C,0.128834,0.134969,0.121019,0.140244,0.143678,0.139665
D,0.104294,0.092025,0.095541,0.134146,0.114943,0.156425
E,0.122699,0.184049,0.133758,0.152439,0.091954,0.145251
F,0.134969,0.171779,0.203822,0.146341,0.143678,0.145251
G,0.141104,0.116564,0.127389,0.170732,0.201149,0.145251


In [182]:
B

owner,A,B,C,D,E,F,G
pet,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cat,0.184211,0.167883,0.169643,0.152778,0.192308,0.177215,0.198582
dog,0.157895,0.182482,0.1875,0.159722,0.224359,0.151899,0.212766
ferret,0.177632,0.124088,0.133929,0.152778,0.147436,0.126582,0.113475
parraot,0.217105,0.167883,0.169643,0.166667,0.153846,0.234177,0.163121
snake,0.131579,0.167883,0.196429,0.194444,0.102564,0.170886,0.12766
turtle,0.131579,0.189781,0.142857,0.173611,0.179487,0.139241,0.184397


In [179]:
A.sum()

pet
cat        1.0
dog        1.0
ferret     1.0
parraot    1.0
snake      1.0
turtle     1.0
dtype: float64

In [180]:
B.sum()

owner
A    1.0
B    1.0
C    1.0
D    1.0
E    1.0
F    1.0
G    1.0
dtype: float64