# Containers
- builtin container types:
    - list
    - tuple
    - dict
    - set

## Lists
- type list
- iterable
- length: builtin function len
- modifiable: append/extend/remove/sort/...
- getitem/setitem with operator [] (index or slice)

In [1]:
cities = [
    "Toulouse",
    "Montpellier",
    "Marseille",
    "Lyon",
    "Valence",
    "Saint Etienne",
    "Paris",
    "Pau",
]

In [2]:
del cities[-1]
cities

['Toulouse',
 'Montpellier',
 'Marseille',
 'Lyon',
 'Valence',
 'Saint Etienne',
 'Paris']

In [3]:
print("City count:", len(cities))
for city in cities:
    print("  -", city)

City count: 7
  - Toulouse
  - Montpellier
  - Marseille
  - Lyon
  - Valence
  - Saint Etienne
  - Paris


## Tuples
- type: tuple
- iterable
- length: builtin function len
- getitem: operator[] with index, slice
- not modifiable: NO methods or operators setitem, del, append ...

In [4]:
# tuple[str,int,str]
city = "Toulouse", 477_000, "31000"
city

('Toulouse', 477000, '31000')

In [5]:
type(city)

tuple

In [6]:
nom = city[0]
nom

'Toulouse'

In [7]:
# TypeError: 'tuple' object does not support item assignment
# city[0] = "Pau"

In [8]:
# avoid that
nom = city[0]
population = city[1]

In [9]:
# unpack tuple into n variables
nom, population, cp = city
print(nom, population, cp, sep=", ")

Toulouse, 477000, 31000


In [10]:
# ValueError: too many values to unpack (expected 2)
# nom, population = city

# ValueError: not enough values to unpack (expected 4, got 3)
# nom, population, cp, mayor = city

In [11]:
nom, _, cp = city
print(nom, cp, sep=", ")

Toulouse, 31000


In [12]:
len(city)

3

In [13]:
for info in city:
    print(info)

Toulouse
477000
31000


In [14]:
# len(12, 34, 56) # you must putparenthesis around tuple
len((12, 34, 56))

3

## Tuple vs List

### Reading and computing

In [15]:
data1 = (12, 34, 56)
data2 = [12, 34, 56]

In [16]:
s1 = sum(data1)
s2 = sum(data2)
print(s1, s2)

102 102


### Modification: KO OK

In [17]:
# AttributeError: 'tuple' object has no attribute 'append'
# data1.append(90)
data2.append(90)
data2

[12, 34, 56, 90]

### Exercise
- Define a list of cities represented by a tuple (name, population, code postal)
- Display cities line by line: name = Toulouse, population = 477000, code postal = 31000
- Compute total population of all cities
- Compute min population
- Compute max population
- list of population only (sorted with ascending order: numeric)
- list of name only (sorted with ascending order: alphabetic)

In [18]:
# type: list[tuple[str,int,str]]
liste_villes = [
    ('Toulouse', 500_000, '31000'), 
    ('Agen', 200_000, '47000'), 
    ('Montauban', 100_000, '82000'), 
    ('Lyon', 700_000, '69000'), 
    ('Saint Etienne', 300_000, '42000'),
]
liste_villes

[('Toulouse', 500000, '31000'),
 ('Agen', 200000, '47000'),
 ('Montauban', 100000, '82000'),
 ('Lyon', 700000, '69000'),
 ('Saint Etienne', 300000, '42000')]

In [19]:
liste_villes[0]

('Toulouse', 500000, '31000')

In [20]:
liste_villes[0][0]

'Toulouse'

In [21]:
for i in range(len(liste_villes)):
    print(
        'Nom : ',
        liste_villes[i][0], 
        ' ; Habitants : ', 
        liste_villes[i][1],
        ' ; Code Postal : ',
        liste_villes[i][2],
        sep=''
    )

Nom : Toulouse ; Habitants : 500000 ; Code Postal : 31000
Nom : Agen ; Habitants : 200000 ; Code Postal : 47000
Nom : Montauban ; Habitants : 100000 ; Code Postal : 82000
Nom : Lyon ; Habitants : 700000 ; Code Postal : 69000
Nom : Saint Etienne ; Habitants : 300000 ; Code Postal : 42000


In [22]:
for ville in liste_villes:
     print(
        'Nom : ',
        ville[0], 
        ' ; Habitants : ', 
        ville[1],
        ' ; Code Postal : ',
        ville[2],
        sep=''
    )

Nom : Toulouse ; Habitants : 500000 ; Code Postal : 31000
Nom : Agen ; Habitants : 200000 ; Code Postal : 47000
Nom : Montauban ; Habitants : 100000 ; Code Postal : 82000
Nom : Lyon ; Habitants : 700000 ; Code Postal : 69000
Nom : Saint Etienne ; Habitants : 300000 ; Code Postal : 42000


In [23]:
# foreach with unpack current element into n varaiables
for nom, population, cp in liste_villes:
     print(
        'Nom : ',
        nom, 
        ' ; Habitants : ', 
        population,
        ' ; Code Postal : ',
        cp,
        sep=''
    )

Nom : Toulouse ; Habitants : 500000 ; Code Postal : 31000
Nom : Agen ; Habitants : 200000 ; Code Postal : 47000
Nom : Montauban ; Habitants : 100000 ; Code Postal : 82000
Nom : Lyon ; Habitants : 700000 ; Code Postal : 69000
Nom : Saint Etienne ; Habitants : 300000 ; Code Postal : 42000


In [24]:
# with f-string (formatted string)
for nom, population, cp in liste_villes:
     print(f'Nom : {nom} ; Habitants : {population} ; Code Postal : {cp}')

Nom : Toulouse ; Habitants : 500000 ; Code Postal : 31000
Nom : Agen ; Habitants : 200000 ; Code Postal : 47000
Nom : Montauban ; Habitants : 100000 ; Code Postal : 82000
Nom : Lyon ; Habitants : 700000 ; Code Postal : 69000
Nom : Saint Etienne ; Habitants : 300000 ; Code Postal : 42000


In [25]:
sommePopulation = 0
for ville in liste_villes: 
    sommePopulation = sommePopulation + ville[1]
print(sommePopulation)

1800000


In [26]:
sommePopulation = 0
for _, population, _ in liste_villes: 
    sommePopulation = sommePopulation +population
print(sommePopulation)

1800000


In [27]:
# expression for = generator
total_population = sum(population for _, population, _ in liste_villes)
total_population

1800000

In [28]:
population_min = min(population for _,population,_ in liste_villes)
population_min

100000

In [29]:
population_max = max(population for _,population,_ in liste_villes)
population_max

700000

In [30]:
sorted(nom for nom,_,_ in liste_villes)

['Agen', 'Lyon', 'Montauban', 'Saint Etienne', 'Toulouse']

In [31]:
# NB: sorted uses operator < on str objects
'Agen' < 'Lyon'

True

In [32]:
population_sort = sorted(population for _,population,_ in liste_villes)
population_sort

[100000, 200000, 300000, 500000, 700000]

In [33]:
sorted(liste_villes)

[('Agen', 200000, '47000'),
 ('Lyon', 700000, '69000'),
 ('Montauban', 100000, '82000'),
 ('Saint Etienne', 300000, '42000'),
 ('Toulouse', 500000, '31000')]

In [34]:
# NB: sorted uses operator < on tuple object (first component, then second, then third
# Here: decision on first component name (str)
('Agen', 200000, '47000') < ('Lyon', 700000, '69000')

True

In [35]:
# Here: decision on second component population (int)
('Saint Sauveur', 505, '05200') < ('Saint Sauveur', 1321, '33250')

True

In [36]:
liste_villes.append(('Saint Sauveur', 505, '05200'))
liste_villes.append(('Saint Sauveur',1321, '33250'))
liste_villes.sort() # NB: in place sort
liste_villes

[('Agen', 200000, '47000'),
 ('Lyon', 700000, '69000'),
 ('Montauban', 100000, '82000'),
 ('Saint Etienne', 300000, '42000'),
 ('Saint Sauveur', 505, '05200'),
 ('Saint Sauveur', 1321, '33250'),
 ('Toulouse', 500000, '31000')]

In [37]:
liste_villes.sort(reverse=True)
liste_villes

[('Toulouse', 500000, '31000'),
 ('Saint Sauveur', 1321, '33250'),
 ('Saint Sauveur', 505, '05200'),
 ('Saint Etienne', 300000, '42000'),
 ('Montauban', 100000, '82000'),
 ('Lyon', 700000, '69000'),
 ('Agen', 200000, '47000')]

In [38]:
def population_from_city(city):
    """get population from a city represented as a tuple
    
    population must be at index 1

    Example:
        city = ('Saint Sauveur', 505, '05200')
        population_from_city(city) -> 505
    """
    return city[1]

In [39]:
city = ('Saint Sauveur', 505, '05200')
pop = population_from_city(city)
pop

505

In [40]:
# access doctring of my function population_from_city
population_from_city?

[1;31mSignature:[0m [0mpopulation_from_city[0m[1;33m([0m[0mcity[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
get population from a city represented as a tuple

population must be at index 1

Example:
    city = ('Saint Sauveur', 505, '05200')
    population_from_city(city) -> 505
[1;31mFile:[0m      c:\users\aelion\appdata\local\temp\ipykernel_2484\1608077148.py
[1;31mType:[0m      function

In [41]:
liste_villes.sort(key=population_from_city)
liste_villes

[('Saint Sauveur', 505, '05200'),
 ('Saint Sauveur', 1321, '33250'),
 ('Montauban', 100000, '82000'),
 ('Agen', 200000, '47000'),
 ('Saint Etienne', 300000, '42000'),
 ('Toulouse', 500000, '31000'),
 ('Lyon', 700000, '69000')]

In [42]:
# TypeError: 'int' object is not subscriptable
# 12 is not a duck "city"
#population_from_city(12)

## Generator, for expression

In [43]:
[x**2 + 1 for x in range(10)]

[1, 2, 5, 10, 17, 26, 37, 50, 65, 82]

In [44]:
g = (x**2 + 1 for x in range(10))
g

<generator object <genexpr> at 0x000002DD3E3458A0>

In [45]:
# evaluate this cell until exception StopIteration
next(g)

1

In [46]:
sum(x**2 + 1 for x in range(10))

295

## Dictionaries

### A city as dictionary

In [47]:
# dict[str, str | int] 
city = {
    "name": "Toulouse",
    "population": 477_000,
    "code_postal": "31000",
}
city

{'name': 'Toulouse', 'population': 477000, 'code_postal': '31000'}

In [48]:
type(city)

dict

In [49]:
# get value associated to a key
city["name"]

'Toulouse'

In [50]:
# KeyError: 'mayor'
# city["mayor"]

In [51]:
city["population"] += 10_000
city

{'name': 'Toulouse', 'population': 487000, 'code_postal': '31000'}

In [52]:
city["area"] = 118.3
city

{'name': 'Toulouse',
 'population': 487000,
 'code_postal': '31000',
 'area': 118.3}

In [53]:
del city["area"]
city

{'name': 'Toulouse', 'population': 487000, 'code_postal': '31000'}

In [54]:
# KeyError: 'area'
# del city["area"]

In [55]:
len(city)

3

In [56]:
# TypeError: object of type 'int' has no len()
# len(123)

In [57]:
# default iteration: keys
for info_name in city:
    print(info_name)

name
population
code_postal


In [58]:
for info_name in city.keys():
    print(info_name)

name
population
code_postal


In [59]:
for info_value in city.values():
    print(info_value)

Toulouse
487000
31000


In [60]:
for info_name, info_value in city.items():
    print(info_name, info_value, sep=" = ")

name = Toulouse
population = 487000
code_postal = 31000


### List of dictionaries

In [61]:
list_cities = [
    {'name': 'Toulouse', 'population': 500_000, 'code_postal': '31000'}, 
    {'name': 'Agen', 'population': 200_000, 'code_postal': '47000'}, 
    {'name': 'Montauban', 'population': 100_000, 'code_postal': '82000'}, 
    {'name': 'Lyon', 'population': 700_000, 'code_postal': '69000'}, 
    {'name': 'Saint Etienne', 'population': 300_000, 'code_postal': '42000'},
]
list_cities

[{'name': 'Toulouse', 'population': 500000, 'code_postal': '31000'},
 {'name': 'Agen', 'population': 200000, 'code_postal': '47000'},
 {'name': 'Montauban', 'population': 100000, 'code_postal': '82000'},
 {'name': 'Lyon', 'population': 700000, 'code_postal': '69000'},
 {'name': 'Saint Etienne', 'population': 300000, 'code_postal': '42000'}]

#### Pretty print

In [62]:
for city in list_cities:
    #print('Nom :', city["name"])
    print(f"Nom : {city['name']} ; Habitants : {city['population']} ; Code Postal : {city['code_postal']}")

Nom : Toulouse ; Habitants : 500000 ; Code Postal : 31000
Nom : Agen ; Habitants : 200000 ; Code Postal : 47000
Nom : Montauban ; Habitants : 100000 ; Code Postal : 82000
Nom : Lyon ; Habitants : 700000 ; Code Postal : 69000
Nom : Saint Etienne ; Habitants : 300000 ; Code Postal : 42000


In [63]:
# Handle any type of dictionary
for data in list_cities:
    for key, value in data.items():
        print(f"{key} : {value} ; ", end='')
    print()

name : Toulouse ; population : 500000 ; code_postal : 31000 ; 
name : Agen ; population : 200000 ; code_postal : 47000 ; 
name : Montauban ; population : 100000 ; code_postal : 82000 ; 
name : Lyon ; population : 700000 ; code_postal : 69000 ; 
name : Saint Etienne ; population : 300000 ; code_postal : 42000 ; 


#### Statistics
Compute: total, min, max population

In [64]:
for city in list_cities:
    print(city['population'])

500000
200000
100000
700000
300000


In [65]:
# list of populations
[ city['population'] for city in list_cities ]

[500000, 200000, 100000, 700000, 300000]

In [66]:
sum(city['population'] for city in list_cities)

1800000

In [67]:
min(city['population'] for city in list_cities)

100000

In [68]:
max(city['population'] for city in list_cities)

700000

#### Sort 
sort list_cities by population DESC

In [69]:
list_cities.sort(key=lambda city: city['population'], reverse=True)
list_cities

[{'name': 'Lyon', 'population': 700000, 'code_postal': '69000'},
 {'name': 'Toulouse', 'population': 500000, 'code_postal': '31000'},
 {'name': 'Saint Etienne', 'population': 300000, 'code_postal': '42000'},
 {'name': 'Agen', 'population': 200000, 'code_postal': '47000'},
 {'name': 'Montauban', 'population': 100000, 'code_postal': '82000'}]

## Set
unique value

In [70]:
numbers = { 12, 4, 65, 4, 12, 33, 1 }
numbers

{1, 4, 12, 33, 65}

In [71]:
numbers.add(5)
numbers.add(65)
numbers

{1, 4, 5, 12, 33, 65}

In [72]:
for n in numbers:
    print(n)

65
1
33
4
5
12


In [73]:
len(numbers)

6

In [74]:
numbers.remove(65)
numbers

{1, 4, 5, 12, 33}

## Contains

In [76]:
list_numbers = [1, 4, 5, 12, 33]
tuple_numbers = 1, 4, 5, 12, 33
dict_numbers = {1: "A", 4: "A", 5: "B", 12: "C", 33: "D"}
set_numbers = {1, 4, 5, 12, 33}

In [78]:
print(5 in list_numbers)
print(7 in list_numbers)

True
False


In [79]:
print(5 in tuple_numbers)
print(7 in tuple_numbers)

True
False


In [80]:
print(5 in set_numbers)
print(7 in set_numbers)

True
False


In [81]:
# operator 'in' works on dict keys (by default)
print(5 in dict_numbers)
print(7 in dict_numbers)

True
False


In [82]:
# operator 'in' on keys
print(5 in dict_numbers.keys())
print(7 in dict_numbers.keys())

True
False


In [85]:
# operator 'in' on values
print(5 in dict_numbers.values()) # always False
print(7 in dict_numbers.values()) # always False
print("A" in dict_numbers.values())
print("Z" in dict_numbers.values())

False
False
True
False


In [87]:
"coffee" in "time to coffee break"

True