In [95]:
import numpy as np
import pandas as pd
pd.__version__

'0.25.2'

In [96]:
# constants
tuples = [(False, 99.0,  0,  23, "abc"),
          (True,  -42.0, -1, 99, "xyz"),
          (False, 0.0,   42, 42, "def"),
          (True,  None,  99, -1, "jkl"),
          (False, 16.66, 23, 0, "ghi")]

obj_index = ("m","n","o","p","q")
int64_index = (6, 7, 8, 9, 10)
bool_index = (True, False, True, False, True)
double64_index = (6.1, 6.2, 6.3, 6.4, 6.5)


In [97]:
##
## dataframe tests
##

In [98]:
# data
columns = obj_index
index = int64_index
df = pd.DataFrame(tuples, columns = columns, index = index)
print('DataFrame tests:\n') 
print(df.to_string())

DataFrame tests:

        m      n   o   p    q
6   False  99.00   0  23  abc
7    True -42.00  -1  99  xyz
8   False   0.00  42  42  def
9    True    NaN  99  -1  jkl
10  False  16.66  23   0  ghi


In [79]:
# expected shape, columns and index
assert((len(index), len(columns)) == df.shape)
assert(np.array_equal(columns, df.columns.to_numpy()))
assert(np.array_equal(index, df.index.to_numpy()))

In [80]:
##
## loc tests
##

In [81]:
# take single index
expected = pd.Series(tuples[0], index = columns)
print (expected.to_string())
assert(expected.equals(df.loc[6]))

m    False
n       99
o        0
p       23
q      abc


In [82]:
# take single index, two columns
expected = pd.Series(list(tuples[0])[0:2], index = columns[0:2])
print (expected.to_string())
assert(expected.equals(df.loc[index[0], list(columns[0:2])]))

m    False
n       99


In [83]:
# take two indices
expected = pd.DataFrame(tuples[0:2], columns = columns, index = index[0:2])
print (expected.to_string())
assert(expected.equals(df.loc[list(index[0:2])]))

       m     n  o   p    q
6  False  99.0  0  23  abc
7   True -42.0 -1  99  xyz


In [84]:
# take two indices, two columns
data = [list(tuples[0][0:2]), list(tuples[1][0:2])]
expected = pd.DataFrame(data, columns = columns[0:2], index = index[0:2])
print (expected.to_string())
assert(expected.equals(df.loc[list(index[0:2]), list(columns[0:2])]))

       m     n
6  False  99.0
7   True -42.0


In [85]:
##
## sort_values tests
##

In [86]:
## data
columns = obj_index
index = int64_index
df = pd.DataFrame(tuples, columns = columns, index = index)
print('sort_values tests:\n') 
print(df.to_string())

sort_values tests:

        m      n   o   p    q
6   False  99.00   0  23  abc
7    True -42.00  -1  99  xyz
8   False   0.00  42  42  def
9    True    NaN  99  -1  jkl
10  False  16.66  23   0  ghi


In [87]:
# sort by one column
expected = [[6, 8, 10, 7, 9],
            [7, 8, 10, 6, 9],
            [7, 6, 10, 8, 9],
            [9, 10, 6, 8, 7],
            [6, 8, 10, 9, 7]]
for idx in range(len(columns)):
  print('Sort by column: {0}\n'.format(columns[idx])) 
  dfs = df.sort_values(by=columns[idx])
  print(dfs.to_string())
  print("\n")
  assert(expected[idx] == dfs.index.to_list())

Sort by column: m

        m      n   o   p    q
6   False  99.00   0  23  abc
8   False   0.00  42  42  def
10  False  16.66  23   0  ghi
7    True -42.00  -1  99  xyz
9    True    NaN  99  -1  jkl


Sort by column: n

        m      n   o   p    q
7    True -42.00  -1  99  xyz
8   False   0.00  42  42  def
10  False  16.66  23   0  ghi
6   False  99.00   0  23  abc
9    True    NaN  99  -1  jkl


Sort by column: o

        m      n   o   p    q
7    True -42.00  -1  99  xyz
6   False  99.00   0  23  abc
10  False  16.66  23   0  ghi
8   False   0.00  42  42  def
9    True    NaN  99  -1  jkl


Sort by column: p

        m      n   o   p    q
9    True    NaN  99  -1  jkl
10  False  16.66  23   0  ghi
6   False  99.00   0  23  abc
8   False   0.00  42  42  def
7    True -42.00  -1  99  xyz


Sort by column: q

        m      n   o   p    q
6   False  99.00   0  23  abc
8   False   0.00  42  42  def
10  False  16.66  23   0  ghi
9    True    NaN  99  -1  jkl
7    True -42.00  -1  99  x

In [88]:
# sort by two columns
expected = [[8, 10, 6, 7, 9],
            [7, 8, 10, 6, 9],
            [7, 6, 10, 8, 9],
            [9, 10, 6, 8, 7]]
for idx in range(len(columns) - 1):
  dfs = df.sort_values(by=[columns[idx], columns[idx+1]])
  print('Sort by columns: [{0}, {1}]\n'.format(columns[idx], columns[idx+1]))
  print(dfs.to_string())
  print("\n")
  assert(expected[idx] == dfs.index.to_list())

Sort by columns: [m, n]

        m      n   o   p    q
8   False   0.00  42  42  def
10  False  16.66  23   0  ghi
6   False  99.00   0  23  abc
7    True -42.00  -1  99  xyz
9    True    NaN  99  -1  jkl


Sort by columns: [n, o]

        m      n   o   p    q
7    True -42.00  -1  99  xyz
8   False   0.00  42  42  def
10  False  16.66  23   0  ghi
6   False  99.00   0  23  abc
9    True    NaN  99  -1  jkl


Sort by columns: [o, p]

        m      n   o   p    q
7    True -42.00  -1  99  xyz
6   False  99.00   0  23  abc
10  False  16.66  23   0  ghi
8   False   0.00  42  42  def
9    True    NaN  99  -1  jkl


Sort by columns: [p, q]

        m      n   o   p    q
9    True    NaN  99  -1  jkl
10  False  16.66  23   0  ghi
6   False  99.00   0  23  abc
8   False   0.00  42  42  def
7    True -42.00  -1  99  xyz




In [93]:
# sort by all columns
expected = [8, 10, 6, 7, 9]
dfs = df.sort_values(list(columns))
print('Sort by columns: {0}\n'.format(columns))
print(dfs.to_string())
print("\n")
assert(expected == dfs.index.to_list())

Sort by columns: ('m', 'n', 'o', 'p', 'q')

        m      n   o   p    q
8   False   0.00  42  42  def
10  False  16.66  23   0  ghi
6   False  99.00   0  23  abc
7    True -42.00  -1  99  xyz
9    True    NaN  99  -1  jkl




In [94]:
# sort by one index
for idx in range(len(index)):
  try: 
        dfs = df.sort_values(by=index[idx], axis=1)
  except TypeError as e:
    print(str(e))
    assert(str(e).startswith("'>' not supported between instances"))

'>' not supported between instances of 'numpy.ndarray' and 'str'
'>' not supported between instances of 'numpy.ndarray' and 'str'
'>' not supported between instances of 'numpy.ndarray' and 'str'
'>' not supported between instances of 'numpy.ndarray' and 'str'
'>' not supported between instances of 'numpy.ndarray' and 'str'


In [152]:
np.arange(1.0004, 99.5, 0.6)

array([ 1.0004,  1.6004,  2.2004,  2.8004,  3.4004,  4.0004,  4.6004,
        5.2004,  5.8004,  6.4004,  7.0004,  7.6004,  8.2004,  8.8004,
        9.4004, 10.0004, 10.6004, 11.2004, 11.8004, 12.4004, 13.0004,
       13.6004, 14.2004, 14.8004, 15.4004, 16.0004, 16.6004, 17.2004,
       17.8004, 18.4004, 19.0004, 19.6004, 20.2004, 20.8004, 21.4004,
       22.0004, 22.6004, 23.2004, 23.8004, 24.4004, 25.0004, 25.6004,
       26.2004, 26.8004, 27.4004, 28.0004, 28.6004, 29.2004, 29.8004,
       30.4004, 31.0004, 31.6004, 32.2004, 32.8004, 33.4004, 34.0004,
       34.6004, 35.2004, 35.8004, 36.4004, 37.0004, 37.6004, 38.2004,
       38.8004, 39.4004, 40.0004, 40.6004, 41.2004, 41.8004, 42.4004,
       43.0004, 43.6004, 44.2004, 44.8004, 45.4004, 46.0004, 46.6004,
       47.2004, 47.8004, 48.4004, 49.0004, 49.6004, 50.2004, 50.8004,
       51.4004, 52.0004, 52.6004, 53.2004, 53.8004, 54.4004, 55.0004,
       55.6004, 56.2004, 56.8004, 57.4004, 58.0004, 58.6004, 59.2004,
       59.8004, 60.4