# Comparison and sorting

In [25]:
import math
import locale
from operator import attrgetter
from functools import cmp_to_key
from city import City
import pandas as pd

In [2]:
assert 12 < 15

# floats
assert 12.5 < 15.5
assert float('-Inf') < 12.5 < float('Inf')
# speciam case NaN
assert not 12.5 < float('NaN')
assert not 12.5 > float('NaN')
assert not 12.5 <= float('NaN')
assert not 12.5 >= float('NaN')
assert not 12.5 == float('NaN')
assert 12.5 != float('NaN')
assert not float('NaN') == float('NaN')
assert math.isnan(float('NaN'))

# str: ordre lexycographique (table de caractères UNICODE)
assert "arbre" < "fleur"
assert "Z" < "a" # Casse
assert "étuve" < "été" # faux pour la langue française
assert "Stockholms" < "Åre" # ok en suèdois (coup de bol)
assert "mano" < "mañana" # ok en espagnol (coup de bol)
assert "mañana" > "matador" # faux

In [3]:
for cat in locale.LC_COLLATE, locale.LC_NUMERIC, locale.LC_MONETARY, locale.LC_TIME: 
    print(locale.getlocale(cat))

(None, None)
(None, None)
(None, None)
(None, None)


In [4]:
locale.setlocale(locale.LC_ALL, 'fr_FR.UTF8')

'fr_FR.UTF8'

In [5]:
for cat in locale.LC_COLLATE, locale.LC_NUMERIC, locale.LC_MONETARY, locale.LC_TIME: 
    print(locale.getlocale(cat))

('fr_FR', 'UTF-8')
('fr_FR', 'UTF-8')
('fr_FR', 'UTF-8')
('fr_FR', 'UTF-8')


In [6]:
# comparaison en fonction de la locale:
# result:
#  <0 => 
#  =0 =>
#  >0 =>
print(locale.strcoll("étuve", "été"))
print(locale.strcoll("arbre", "fleur"))
print(locale.strcoll("cœur", "corruption"))

1
-1
-1


In [7]:
words_fr = [ 
    "été", "étage", "étuve", 
    "arbre", "fleur", "cœur", "coaguler", "corruption",
    "hameau", "hameçonner", "hamster"
]

In [8]:
sorted_words_fr = sorted(words_fr)
sorted_words_fr

['arbre',
 'coaguler',
 'corruption',
 'cœur',
 'fleur',
 'hameau',
 'hameçonner',
 'hamster',
 'étage',
 'étuve',
 'été']

In [9]:
words_fr.sort()
words_fr

['arbre',
 'coaguler',
 'corruption',
 'cœur',
 'fleur',
 'hameau',
 'hameçonner',
 'hamster',
 'étage',
 'étuve',
 'été']

In [10]:
sorted?

[1;31mSignature:[0m [0msorted[0m[1;33m([0m[0miterable[0m[1;33m,[0m [1;33m/[0m[1;33m,[0m [1;33m*[0m[1;33m,[0m [0mkey[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m [0mreverse[0m[1;33m=[0m[1;32mFalse[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Return a new list containing all items from the iterable in ascending order.

A custom key function can be supplied to customize the sort order, and the
reverse flag can be set to request the result in descending order.
[1;31mType:[0m      builtin_function_or_method

In [11]:
sorted(("arbre", "Fleur", "Zébulon"), key=str.upper)

['arbre', 'Fleur', 'Zébulon']

In [12]:
cities = [
    City("Pau", 77_000, 64),
    City("Toulouse", 470_000, 31),
    City("Paris", 2_161_000, 75),
    City("Valence", 62_000, 26),
    City("Valence", 220, 16),
]

In [13]:
sorted(cities)

[City[Paris, pop=2161000, dept=75],
 City[Pau, pop=77000, dept=64],
 City[Toulouse, pop=470000, dept=31],
 City[Valence, pop=220, dept=16],
 City[Valence, pop=62000, dept=26]]

In [14]:
sorted(cities, key=lambda c: c.name)

[City[Paris, pop=2161000, dept=75],
 City[Pau, pop=77000, dept=64],
 City[Toulouse, pop=470000, dept=31],
 City[Valence, pop=62000, dept=26],
 City[Valence, pop=220, dept=16]]

In [15]:
sorted(cities, key=attrgetter('name'))

[City[Paris, pop=2161000, dept=75],
 City[Pau, pop=77000, dept=64],
 City[Toulouse, pop=470000, dept=31],
 City[Valence, pop=62000, dept=26],
 City[Valence, pop=220, dept=16]]

In [16]:
sorted(cities, key=attrgetter('population'))

[City[Valence, pop=220, dept=16],
 City[Valence, pop=62000, dept=26],
 City[Pau, pop=77000, dept=64],
 City[Toulouse, pop=470000, dept=31],
 City[Paris, pop=2161000, dept=75]]

In [17]:
sorted(cities, key=attrgetter('name', 'population'))

[City[Paris, pop=2161000, dept=75],
 City[Pau, pop=77000, dept=64],
 City[Toulouse, pop=470000, dept=31],
 City[Valence, pop=220, dept=16],
 City[Valence, pop=62000, dept=26]]

In [18]:
# adapt old comparison into new one
sorted(words_fr, key=cmp_to_key(locale.strcoll))

['arbre',
 'coaguler',
 'cœur',
 'corruption',
 'étage',
 'été',
 'étuve',
 'fleur',
 'hameau',
 'hameçonner',
 'hamster']

In [19]:
sorted(words_fr, key=locale.strxfrm)

['arbre',
 'coaguler',
 'cœur',
 'corruption',
 'étage',
 'été',
 'étuve',
 'fleur',
 'hameau',
 'hameçonner',
 'hamster']

In [20]:
# sort spanish words: mano, mañana, matador
# sort swedish cities: Stockholm , Åre, Ängelholm, Askersund, Köping, Kungälv, Örnsköldsvik, Oxelösund , Ystad 

In [21]:
words_es = [ "mano", "mañana", "matador" ] 
locale.setlocale(locale.LC_ALL, 'es_ES.UTF8')
sorted(words_es, key=locale.strxfrm)

['mano', 'mañana', 'matador']

In [22]:
cities_swe = [
    "Stockholms" , "Åre", "Ängelholm", 
    "Askersund", "Köping", "Kungälv", 
    "Örnsköldsvik", "Oxelösund" , "Ystad" 
]
locale.setlocale(locale.LC_ALL, 'sv_SE.UTF8')
sorted(cities_swe, key=cmp_to_key(locale.strcoll))

['Askersund',
 'Kungälv',
 'Köping',
 'Oxelösund',
 'Stockholms',
 'Ystad',
 'Åre',
 'Ängelholm',
 'Örnsköldsvik']

In [23]:
sorted(cities_swe, key=locale.strxfrm)

['Askersund',
 'Kungälv',
 'Köping',
 'Oxelösund',
 'Stockholms',
 'Ystad',
 'Åre',
 'Ängelholm',
 'Örnsköldsvik']

In [28]:
dfCities = pd.read_csv('cities.csv')
dfCities.head(3)

Unnamed: 0,insee_code,city_code,zip_code,label,latitude,longitude,department_name,department_number,region_name,region_geojson_name
0,25620,ville du pont,25650,ville du pont,46.999873,6.498147,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté
1,25624,villers grelot,25640,villers grelot,47.361512,6.235167,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté
2,25615,villars les blamont,25310,villars les blamont,47.368384,6.871415,doubs,25,bourgogne-franche-comté,Bourgogne-Franche-Comté


In [31]:
dfCities.sort_values('city_code', inplace=True)
dfCities

Unnamed: 0,insee_code,city_code,zip_code,label,latitude,longitude,department_name,department_number,region_name,region_geojson_name
16344,64001,aast,64460,aast,43.291176,-0.081741,pyrénées-atlantiques,64,nouvelle-aquitaine,Nouvelle-Aquitaine
28725,55001,abainville,55130,abainville,48.532751,5.514721,meuse,55,grand est,Grand Est
32634,59001,abancourt,59268,abancourt,50.237016,3.208146,nord,59,hauts-de-france,Hauts-de-France
32869,60001,abancourt,60220,abancourt,49.692898,1.769243,oise,60,hauts-de-france,Hauts-de-France
29234,54001,abaucourt,54610,abaucourt sur seille,48.892375,6.265494,meurthe-et-moselle,54,grand est,Grand Est
...,...,...,...,...,...,...,...,...,...,...
26357,2B364,zuani,20272,zuani,42.265395,9.340840,haute-corse,2B,corse,Corse
33710,62905,zudausques,62500,zudausques,50.752659,2.159982,pas-de-calais,62,hauts-de-france,Hauts-de-France
34755,62906,zutkerque,62370,zutkerque,50.853256,2.059691,pas-de-calais,62,hauts-de-france,Hauts-de-France
34143,59668,zuydcoote,59123,zuydcoote,51.063734,2.486571,nord,59,hauts-de-france,Hauts-de-France


In [35]:
dfCities.sort_values(['region_name', 'label'], inplace=True)
dfCities

Unnamed: 0,insee_code,city_code,zip_code,label,latitude,longitude,department_name,department_number,region_name,region_geojson_name
5167,42001,aboen,42380,aboen,45.412675,4.126791,loire,42,auvergne-rhône-alpes,Auvergne-Rhône-Alpes
7530,74001,abondance,74360,abondance,46.266280,6.731861,haute-savoie,74,auvergne-rhône-alpes,Auvergne-Rhône-Alpes
21370,03001,abrest,3200,abrest,46.096051,3.449986,allier,03,auvergne-rhône-alpes,Auvergne-Rhône-Alpes
1180,07001,accons,7160,accons,44.888444,4.392011,ardèche,07,auvergne-rhône-alpes,Auvergne-Rhône-Alpes
6869,69001,affoux,69170,affoux,45.843773,4.413312,rhône,69,auvergne-rhône-alpes,Auvergne-Rhône-Alpes
...,...,...,...,...,...,...,...,...,...,...
37982,77533,vulaines sur seine,77870,vulaines sur seine,48.431621,2.769685,seine-et-marne,77,île-de-france,Île-de-France
37700,91689,wissous,91320,wissous,48.729691,2.329529,essonne,91,île-de-france,Île-de-France
38602,95690,wy dit joli village,95420,wy dit joli village,49.095847,1.824388,val-d'oise,95,île-de-france,Île-de-France
38156,77534,yebles,77390,yebles,48.637232,2.769102,seine-et-marne,77,île-de-france,Île-de-France
