In [2]:
import os
import tempfile
import ibis

ibis.options.interactive = True

connection = ibis.sqlite.connect('data/geography.db')

In [4]:
countries = connection.table('countries')
countries.schema()

ibis.Schema {
  iso_alpha2   string
  iso_alpha3   string
  iso_numeric  int32
  fips         string
  name         string
  capital      string
  area_km2     float64
  population   int32
  continent    string
}

Type names can be used to cast from one type to another.

In [6]:
countries = connection.table('countries')
fl = countries.population.cast('float').sum()
print(type(fl))
print(fl)

<class 'ibis.expr.types.numeric.FloatingScalar'>
6878963738.0


In [7]:
countries.area_km2.cast('int32').sum()

150012536

In [15]:
area_int = countries.area_km2.cast('int32').sum()
print(f'{area_int} -> {type(area_int)}')
print(f"{area_int.cast('float').cast('string')} -> {type(area_int.cast('float').cast('string'))}")

150012536 -> <class 'ibis.expr.types.numeric.IntegerScalar'>
'150012536.0' -> <class 'ibis.expr.types.strings.StringScalar'>


## Case / if-then-else expressions

In [16]:
expr = (countries.continent
        .case()
        .when('AF', 'Africa')
        .when('AN', 'Antarctica')
        .when('AS', 'Asia')
        .when('EU', 'Europe')
        .when('NA', 'North America')
        .when('OC', 'Oceania')
        .when('SA', 'South America')
        .else_(countries.continent)
        .end()
        .name('continent_name'))

expr.value_counts()

Unnamed: 0,continent_name,count
0,Africa,58
1,Antarctica,5
2,Asia,51
3,Europe,54
4,North America,42
5,Oceania,28
6,South America,14


To test for an arbitrary series of boolean conditions, use the case API method and pass any boolean expressions potentially involving columns of the table:

In [17]:
expr = (ibis.case()
        .when(countries.population > 25_000_000, 'big')
        .when(countries.population < 5_000_000, 'small')
        .else_('medium')
        .end()
        .name('size'))

countries['name', 'population', expr].limit(10)

Unnamed: 0,name,population,size
0,Andorra,84000,small
1,United Arab Emirates,4975593,small
2,Afghanistan,29121286,big
3,Antigua and Barbuda,86754,small
4,Anguilla,13254,small
5,Albania,2986952,small
6,Armenia,2968000,small
7,Netherlands Antilles,300000,small
8,Angola,13068161,medium
9,Antarctica,0,small


Simple ternary-cases (like the Python X `if` COND `else` Y) can be written using the `ifelse` function:

In [18]:
expr = ((countries.continent == 'AS')
        .ifelse('Asia', 'Not Asia')
        .name('is_asia'))

countries['name', 'continent', expr].limit(10)

Unnamed: 0,name,continent,is_asia
0,Andorra,EU,Not Asia
1,United Arab Emirates,AS,Asia
2,Afghanistan,AS,Asia
3,Antigua and Barbuda,,Not Asia
4,Anguilla,,Not Asia
5,Albania,EU,Not Asia
6,Armenia,AS,Asia
7,Netherlands Antilles,,Not Asia
8,Angola,AF,Not Asia
9,Antarctica,AN,Not Asia


## Set membership
The `isin` and `notin` functions are like their pandas counterparts. These can take:

- A list of value expressions, either literal values or other column expressions
- An array/column expression of some kind

In [19]:
is_america = countries.continent.isin(['NA', 'SA'])
countries[is_america].continent.value_counts()

Unnamed: 0,continent,count
0,,42
1,SA,14


In [29]:
is_sa = countries.name.isin(['SA'])
countries[is_sa]

Unnamed: 0,name
