# Slices
- Python
    - list, tuple
    - str
- Numpy
    - ndarray
- Pandas:
    - DataFrame

In [75]:
import numpy as np
# https://numpy.org/doc/stable/reference/index.html#reference
import pandas as pd

## slices on str

In [1]:
text = 'Toulouse, ville rose'

In [2]:
# from index 10 (included) to 15 (excluded)
text[10:15]

'ville'

In [3]:
text[15:]

' rose'

In [4]:
text[:10]

'Toulouse, '

In [5]:
text[:]

'Toulouse, ville rose'

In [6]:
text[-4:]

'rose'

In [7]:
text[-4:-2]

'ro'

In [8]:
text[-2:-4]

''

In [9]:
text[-3:-5:-1]

'or'

In [10]:
text[::-1]

'esor elliv ,esuoluoT'

In [11]:
text[::2]

'Tuos,vlers'

In [12]:
text[1::2]

'olue il oe'

In [13]:
text[5:10_000]

'use, ville rose'

In [14]:
# TypeError: 'str' object does not support item assignment
# text[:2] = '##'

## slices on list

In [15]:
cities = ['Toulouse', 'Pau', 'Bayonne', 'Strasbourg', 'Montpellier', 'Marseille']

In [16]:
cities[:3]

['Toulouse', 'Pau', 'Bayonne']

In [17]:
cities[:3] = ['Paris', 'Bordeaux']
cities

['Paris', 'Bordeaux', 'Strasbourg', 'Montpellier', 'Marseille']

In [18]:
cities[:2] = []
cities

['Strasbourg', 'Montpellier', 'Marseille']

In [19]:
# methods: cities.remove/pop/insert/append/extend/....

In [20]:
del cities[:2]
cities

['Marseille']

In [21]:
n = len(cities)
cities[n:] = ['Paris', 'Bordeaux', 'Strasbourg', 'Montpellier']
cities

['Marseille', 'Paris', 'Bordeaux', 'Strasbourg', 'Montpellier']

In [23]:
index = 1
cities[index:index] = ['Pau', 'Bayonne', 'Toulouse']
cities

['Marseille',
 'Pau',
 'Bayonne',
 'Toulouse',
 'Paris',
 'Bordeaux',
 'Strasbourg',
 'Montpellier']

In [28]:
[ m for m in dir(cities) if m in ['__delitem__', '__getitem__', '__setitem__'] ]

['__delitem__', '__getitem__', '__setitem__']

In [29]:
[ m for m in dir(str) if m in ['__delitem__', '__getitem__', '__setitem__'] ]

['__getitem__']

In [30]:
[ m for m in dir(tuple) if m in ['__delitem__', '__getitem__', '__setitem__'] ]

['__getitem__']

In [31]:
square_ok_12 =  [ 
    [138,	8,	17,	127,	114,	32,	41,	103,	90,	56,	65,	79],
    [19,	125,	140,	6,	43,	101,	116,	30,	67,	77,	92,	54],
    [128,	18,	7,	137,	104,	42,	31,	113,	80,	66,	55,	89],
    [5,	139,	126,	20,	29,	115,	102,	44,	53,	91,	78,	68],
    [136,	10,	15,	129,	112,	34,	39,	105,	88,	58,	63,	81],
    [21,	123,	142,	4,	45,	99,	118,	28,	69,	75,	94,	52],
    [130,	16,	9,	135,	106,	40,	33,	111,	82,	64,	57,	87],
    [3,	141,	124,	22,	27,	117,	100,	46,	51,	93,	76,	70],
    [134,	12,	13,	131,	110,	36,	37,	107,	86,	60,	61,	83],
    [23,	121,	144,	2,	47,	97,	120,	26,	71,	73,	96,	50],
    [132,	14,	11,	133,	108,	38,	35,	109,	84,	62,	59,	85],
    [1,	143,	122,	24,	25,	119,	98,	48,	49,	95,	74,	72]
]

In [37]:
# type list does not accept tuple of slices (n dimension)
# square_ok_12[:3, :3]  # TypeError: list indices must be integers or slices, not tuple
[ row[:3]  for row in square_ok_12[:3] ]

[[138, 8, 17], [19, 125, 140], [128, 18, 7]]

## slices on ndarray (numpy)

In [40]:
square12 = np.array(square_ok_12)
square12

array([[138,   8,  17, 127, 114,  32,  41, 103,  90,  56,  65,  79],
       [ 19, 125, 140,   6,  43, 101, 116,  30,  67,  77,  92,  54],
       [128,  18,   7, 137, 104,  42,  31, 113,  80,  66,  55,  89],
       [  5, 139, 126,  20,  29, 115, 102,  44,  53,  91,  78,  68],
       [136,  10,  15, 129, 112,  34,  39, 105,  88,  58,  63,  81],
       [ 21, 123, 142,   4,  45,  99, 118,  28,  69,  75,  94,  52],
       [130,  16,   9, 135, 106,  40,  33, 111,  82,  64,  57,  87],
       [  3, 141, 124,  22,  27, 117, 100,  46,  51,  93,  76,  70],
       [134,  12,  13, 131, 110,  36,  37, 107,  86,  60,  61,  83],
       [ 23, 121, 144,   2,  47,  97, 120,  26,  71,  73,  96,  50],
       [132,  14,  11, 133, 108,  38,  35, 109,  84,  62,  59,  85],
       [  1, 143, 122,  24,  25, 119,  98,  48,  49,  95,  74,  72]])

In [41]:
square12.shape

(12, 12)

In [42]:
square12.dtype

dtype('int32')

In [43]:
square12.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [44]:
extract = square12[:3, :3]
extract

array([[138,   8,  17],
       [ 19, 125, 140],
       [128,  18,   7]])

In [45]:
extract.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [46]:
extract[0, 0] = 137
assert extract[0, 0] == 137
assert square12[0, 0] == 137

In [47]:
n,_ = square12.shape
n

12

In [50]:
# slice : column of index numColumn
numColumn = 6
square12[:, numColumn ]

array([ 41, 116,  31, 102,  39, 118,  33, 100,  37, 120,  35,  98])

In [51]:
# 2 diags
diag1 = square12.diagonal()
diag1

array([137, 125,   7,  20, 112,  99,  33,  46,  86,  73,  59,  72])

In [52]:
diag1.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : False
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [55]:
# symetric horizontal
diag2 = square12[::-1].diagonal()
diag2

array([  1,  14, 144, 131,  27,  40, 118, 105,  53,  66,  92,  79])

In [56]:
# symetric vertical
diag2bis = square12[:, ::-1].diagonal()
diag2bis

array([ 79,  92,  66,  53, 105, 118,  40,  27, 131, 144,  14,   1])

In [58]:
# symetric vertical with fliplr
np.fliplr(square12)

array([[ 79,  65,  56,  90, 103,  41,  32, 114, 127,  17,   8, 137],
       [ 54,  92,  77,  67,  30, 116, 101,  43,   6, 140, 125,  19],
       [ 89,  55,  66,  80, 113,  31,  42, 104, 137,   7,  18, 128],
       [ 68,  78,  91,  53,  44, 102, 115,  29,  20, 126, 139,   5],
       [ 81,  63,  58,  88, 105,  39,  34, 112, 129,  15,  10, 136],
       [ 52,  94,  75,  69,  28, 118,  99,  45,   4, 142, 123,  21],
       [ 87,  57,  64,  82, 111,  33,  40, 106, 135,   9,  16, 130],
       [ 70,  76,  93,  51,  46, 100, 117,  27,  22, 124, 141,   3],
       [ 83,  61,  60,  86, 107,  37,  36, 110, 131,  13,  12, 134],
       [ 50,  96,  73,  71,  26, 120,  97,  47,   2, 144, 121,  23],
       [ 85,  59,  62,  84, 109,  35,  38, 108, 133,  11,  14, 132],
       [ 72,  74,  95,  49,  48,  98, 119,  25,  24, 122, 143,   1]])

In [63]:
square12[11, 0] = 2
square12

array([[137,   8,  17, 127, 114,  32,  41, 103,  90,  56,  65,  79],
       [ 19, 125, 140,   6,  43, 101, 116,  30,  67,  77,  92,  54],
       [128,  18,   7, 137, 104,  42,  31, 113,  80,  66,  55,  89],
       [  5, 139, 126,  20,  29, 115, 102,  44,  53,  91,  78,  68],
       [136,  10,  15, 129, 112,  34,  39, 105,  88,  58,  63,  81],
       [ 21, 123, 142,   4,  45,  99, 118,  28,  69,  75,  94,  52],
       [130,  16,   9, 135, 106,  40,  33, 111,  82,  64,  57,  87],
       [  3, 141, 124,  22,  27, 117, 100,  46,  51,  93,  76,  70],
       [134,  12,  13, 131, 110,  36,  37, 107,  86,  60,  61,  83],
       [ 23, 121, 144,   2,  47,  97, 120,  26,  71,  73,  96,  50],
       [132,  14,  11, 133, 108,  38,  35, 109,  84,  62,  59,  85],
       [  2, 143, 122,  24,  25, 119,  98,  48,  49,  95,  74,  72]])

In [59]:
square12.sum()

10439

In [64]:
# sum for each column
square12.sum(axis=0)

array([870, 870, 870, 870, 870, 870, 870, 870, 870, 870, 870, 870])

In [66]:
# sum for each row
square12.sum(axis=1)

array([869, 870, 870, 870, 870, 870, 870, 870, 870, 870, 870, 871])

In [67]:
ms = 870

In [68]:
square12.sum(axis=0) == ms

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])

In [69]:
square12.sum(axis=1) == ms

array([False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True, False])

In [71]:
np.all(square12.sum(axis=0) == ms), \
np.all(square12.sum(axis=1) == ms),

(True, False)

In [73]:
square12.T

array([[137,  19, 128,   5, 136,  21, 130,   3, 134,  23, 132,   2],
       [  8, 125,  18, 139,  10, 123,  16, 141,  12, 121,  14, 143],
       [ 17, 140,   7, 126,  15, 142,   9, 124,  13, 144,  11, 122],
       [127,   6, 137,  20, 129,   4, 135,  22, 131,   2, 133,  24],
       [114,  43, 104,  29, 112,  45, 106,  27, 110,  47, 108,  25],
       [ 32, 101,  42, 115,  34,  99,  40, 117,  36,  97,  38, 119],
       [ 41, 116,  31, 102,  39, 118,  33, 100,  37, 120,  35,  98],
       [103,  30, 113,  44, 105,  28, 111,  46, 107,  26, 109,  48],
       [ 90,  67,  80,  53,  88,  69,  82,  51,  86,  71,  84,  49],
       [ 56,  77,  66,  91,  58,  75,  64,  93,  60,  73,  62,  95],
       [ 65,  92,  55,  78,  63,  94,  57,  76,  61,  96,  59,  74],
       [ 79,  54,  89,  68,  81,  52,  87,  70,  83,  50,  85,  72]])

## slices on DataFrame (pandas)

In [74]:
cities = ['Toulouse', 'Pau', 'Bayonne', 'Strasbourg', 'Montpellier', 'Marseille']
pop = (470_000, 77_000, 49_000, 500_000, 311_000, 900_000)
cp = [31000, 64000, 64100, 67000, 34000, 13000]
data = list(zip(cities, pop, cp))
data

[('Toulouse', 470000, 31000),
 ('Pau', 77000, 64000),
 ('Bayonne', 49000, 64100),
 ('Strasbourg', 500000, 67000),
 ('Montpellier', 311000, 34000),
 ('Marseille', 900000, 13000)]

In [76]:
dfCities = pd.DataFrame(data, columns=['name', 'population', 'zipcode'])
dfCities

Unnamed: 0,name,population,zipcode
0,Toulouse,470000,31000
1,Pau,77000,64000
2,Bayonne,49000,64100
3,Strasbourg,500000,67000
4,Montpellier,311000,34000
5,Marseille,900000,13000


In [77]:
dfCities.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   name        6 non-null      object
 1   population  6 non-null      int64 
 2   zipcode     6 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 276.0+ bytes


In [79]:
# each row, each column has type Series
rowMontpellier = dfCities.loc[4]
rowMontpellier 

name          Montpellier
population         311000
zipcode             34000
Name: 4, dtype: object

In [80]:
type(rowMontpellier)

pandas.core.series.Series

In [81]:
dfCities.name

0       Toulouse
1            Pau
2        Bayonne
3     Strasbourg
4    Montpellier
5      Marseille
Name: name, dtype: object

In [82]:
dfCities['name']

0       Toulouse
1            Pau
2        Bayonne
3     Strasbourg
4    Montpellier
5      Marseille
Name: name, dtype: object

In [83]:
dfCities.loc[:, 'name']

0       Toulouse
1            Pau
2        Bayonne
3     Strasbourg
4    Montpellier
5      Marseille
Name: name, dtype: object

In [85]:
# with .loc each bound is included
dfCities.loc[2:4]

Unnamed: 0,name,population,zipcode
2,Bayonne,49000,64100
3,Strasbourg,500000,67000
4,Montpellier,311000,34000


In [87]:
dfCitiesByName = dfCities.set_index('name').sort_index()
dfCitiesByName

Unnamed: 0_level_0,population,zipcode
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Bayonne,49000,64100
Marseille,900000,13000
Montpellier,311000,34000
Pau,77000,64000
Strasbourg,500000,67000
Toulouse,470000,31000


In [88]:
dfCitiesByName.loc['Montpellier']

population    311000
zipcode        34000
Name: Montpellier, dtype: int64

In [92]:
dfCitiesByName.loc['M':'Mzzzzzzzzz']

Unnamed: 0_level_0,population,zipcode
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Marseille,900000,13000
Montpellier,311000,34000


## slices (builtin function)
```
slice(stop)
slice(start, stop[, step])
```

In [93]:
text[slice(3,8)]

'louse'

In [94]:
square12[slice(None), slice(None,4)] # i.e. square12[:, :4]

array([[137,   8,  17, 127],
       [ 19, 125, 140,   6],
       [128,  18,   7, 137],
       [  5, 139, 126,  20],
       [136,  10,  15, 129],
       [ 21, 123, 142,   4],
       [130,  16,   9, 135],
       [  3, 141, 124,  22],
       [134,  12,  13, 131],
       [ 23, 121, 144,   2],
       [132,  14,  11, 133],
       [  2, 143, 122,  24]])