In [1]:
import ibis

In [2]:
ibis.options.interactive = True

For a comparison to pandas, this is not the same as where the data is imported from (e.g. `pandas.read_sql`). pandas loads data into memory and performs the computations itself. **Ibis won't load the data and perform any computation**, but instead **will leave the data in the backend defined in the connection**, and will **ask** the backend to perform the computations.

In [47]:
!mkdir data
!curl -LsS -o data/geography.db 'https://storage.googleapis.com/ibis-tutorial-data/geography.db'

mkdir: cannot create directory ‘data’: File exists


In [4]:
con = ibis.sqlite.connect('data/geography.db')

In [5]:
con.list_tables()

['countries', 'gdp', 'independence']

In [6]:
' | '.join(dir(con)[51:])

'add_operation | ast_schema | attach | begin | compile | compiler | con | connect | create_database | create_table | create_view | current_database | database | database_class | database_name | db_identity | do_connect | drop_table | drop_view | execute | exists_database | exists_table | explain | fetch_from_cursor | get_schema | has_operation | insert | inspector | list_databases | list_schemas | list_tables | load_data | meta | name | raw_sql | reconnect | register_options | schema | sql | table | table_class | table_expr_class | truncate_table | verify | version'

In [7]:
print(con.list_tables())
con.table('countries')

['countries', 'gdp', 'independence']


Unnamed: 0,iso_alpha2,iso_alpha3,iso_numeric,fips,name,capital,area_km2,population,continent
0,AD,AND,20,AN,Andorra,Andorra la Vella,468.0,84000,EU
1,AE,ARE,784,AE,United Arab Emirates,Abu Dhabi,82880.0,4975593,AS
2,AF,AFG,4,AF,Afghanistan,Kabul,647500.0,29121286,AS
3,AG,ATG,28,AC,Antigua and Barbuda,St. Johns,443.0,86754,
4,AI,AIA,660,AV,Anguilla,The Valley,102.0,13254,
...,...,...,...,...,...,...,...,...,...
247,YE,YEM,887,YM,Yemen,Sanaa,527970.0,23495361,AS
248,YT,MYT,175,MF,Mayotte,Mamoudzou,374.0,159042,AF
249,ZA,ZAF,710,SF,South Africa,Pretoria,1219912.0,49000000,AF
250,ZM,ZMB,894,ZA,Zambia,Lusaka,752614.0,13460305,AF


In [8]:
countries = con.table('countries')
' | '.join(dir(countries)[46:])

'aggregate | alias | anti_join | any_inner_join | any_left_join | area_km2 | asof_join | capital | columns | compile | continent | count | cross_join | difference | distinct | drop | dropna | equals | execute | fillna | filter | fips | get_column | get_columns | get_name | group_by | groupby | has_name | head | info | inner_join | intersect | iso_alpha2 | iso_alpha3 | iso_numeric | join | left_join | limit | materialize | mutate | name | op | outer_join | pipe | population | prevent_rewrite | projection | relabel | right_join | rowid | schema | select | semi_join | set_column | sort_by | sql | to_array | union | verify | view | visualize'

In [9]:
print(countries.columns)
print(countries.count())

['iso_alpha2', 'iso_alpha3', 'iso_numeric', 'fips', 'name', 'capital', 'area_km2', 'population', 'continent']
252


In [10]:
pop = countries['name', 'population']
pop

Unnamed: 0,name,population
0,Andorra,84000
1,United Arab Emirates,4975593
2,Afghanistan,29121286
3,Antigua and Barbuda,86754
4,Anguilla,13254
...,...,...
247,Yemen,23495361
248,Mayotte,159042
249,South Africa,49000000
250,Zambia,13460305


In [11]:
pop.sort_by('population')

Unnamed: 0,name,population
0,Antarctica,0
1,Bouvet Island,0
2,Heard Island and McDonald Islands,0
3,U.S. Minor Outlying Islands,0
4,South Georgia and South Sandwich Islands,30
...,...,...
247,Brazil,201103330
248,Indonesia,242968342
249,United States,310232863
250,India,1173108018


In [12]:
pop.sort_by(ibis.desc('population'))

Unnamed: 0,name,population
0,China,1330044000
1,India,1173108018
2,United States,310232863
3,Indonesia,242968342
4,Brazil,201103330
...,...,...
247,South Georgia and South Sandwich Islands,30
248,Antarctica,0
249,Bouvet Island,0
250,Heard Island and McDonald Islands,0


In [13]:
countries['continent'].__dir__()

['_arg',
 '__module__',
 '__doc__',
 'bottomk',
 'approx_nunique',
 'approx_median',
 'max',
 'min',
 'nunique',
 'topk',
 'summary',
 'arbitrary',
 'count',
 'value_counts',
 'first',
 'last',
 'rank',
 'dense_rank',
 'percent_rank',
 'cummin',
 'cummax',
 'lag',
 'lead',
 'ntile',
 'nth',
 'parent',
 'to_projection',
 '_repr_html_',
 'name',
 'type',
 '__init__',
 '__repr__',
 '_repr',
 'equals',
 '__hash__',
 '__bool__',
 '__nonzero__',
 'has_name',
 'get_name',
 '_safe_name',
 '_key',
 '_repr_png_',
 'visualize',
 'pipe',
 'op',
 '_find_backends',
 '_find_backend',
 'execute',
 'compile',
 'verify',
 '__dict__',
 '__weakref__',
 '__new__',
 '__str__',
 '__getattribute__',
 '__setattr__',
 '__delattr__',
 '__lt__',
 '__le__',
 '__eq__',
 '__ne__',
 '__gt__',
 '__ge__',
 '__reduce_ex__',
 '__reduce__',
 '__subclasshook__',
 '__init_subclass__',
 '__format__',
 '__sizeof__',
 '__dir__',
 '__class__',
 'hash',
 'cast',
 'coalesce',
 'greatest',
 'least',
 'typeof',
 'fillna',
 'nullif'

In [14]:
countries['continent'].group_concat('-')

'EU-AS-AS-NA-NA-EU-AS-NA-AF-AN-SA-OC-EU-OC-NA-EU-AS-EU-NA-AS-EU-AF-EU-AS-AF-AF-NA-NA-AS-SA-NA-SA-NA-AS-AN-AF-EU-NA-NA-AS-AF-AF-AF-EU-AF-OC-SA-AF-AS-SA-NA-EU-NA-AF-NA-OC-EU-EU-EU-AF-EU-NA-NA-AF-SA-EU-AF-AF-AF-EU-AF-EU-OC-SA-OC-EU-EU-AF-EU-NA-AS-SA-EU-AF-EU-NA-AF-AF-NA-AF-EU-AN-NA-OC-AF-SA-AS-AN-NA-EU-NA-EU-AS-EU-AS-EU-AS-AS-AS-AS-EU-EU-EU-NA-AS-AS-AF-AS-AS-OC-AF-NA-AS-AS-AS-NA-AS-AS-AS-NA-EU-AS-AF-AF-EU-EU-EU-AF-AF-EU-EU-EU-NA-AF-OC-EU-AF-AS-AS-AS-OC-NA-AF-NA-EU-AF-AS-AF-NA-AS-AF-AF-OC-AF-OC-AF-NA-EU-EU-AS-OC-OC-OC-AS-NA-SA-OC-OC-AS-AS-EU-NA-OC-NA-AS-EU-OC-SA-AS-AF-EU-EU-EU-AF-AS-OC-AF-AF-EU-AS-AF-EU-EU-EU-AF-EU-AF-AF-SA-AF-AF-NA-NA-AS-AF-NA-AF-AN-AF-AS-AS-OC-OC-AS-AF-OC-AS-NA-OC-AS-AF-EU-AF-OC-NA-SA-AS-EU-NA-SA-NA-NA-AS-OC-OC-OC-EU-AS-AF-AF-AF-AF'

In [15]:
countries['continent'].like('AS')

Unnamed: 0,tmp
0,False
1,True
2,True
3,False
4,False
...,...
247,True
248,False
249,False
250,False


In [16]:
countries['continent'] == 'AS'

Unnamed: 0,tmp
0,False
1,True
2,True
3,False
4,False
...,...
247,True
248,False
249,False
250,False


In [17]:
eval("countries['continent'].like('AS') == (countries['continent'] == 'AS')")

Unnamed: 0,tmp
0,True
1,True
2,True
3,True
4,True
...,...
247,True
248,True
249,True
250,True


In [45]:
asian_countries = countries['name', 'continent', 'population'].filter(countries['continent'] == 'AS').limit(10)
asian_countries

Unnamed: 0,name,continent,population
0,United Arab Emirates,AS,4975593
1,Afghanistan,AS,29121286
2,Armenia,AS,2968000
3,Azerbaijan,AS,8303512
4,Bangladesh,AS,156118464
5,Bahrain,AS,738004
6,Brunei,AS,395027
7,Bhutan,AS,699847
8,Cocos [Keeling] Islands,AS,628
9,China,AS,1330044000


In [36]:
asian_countries.count()

51

In [42]:
asian_countries.sort_by('population').limit(10)

Unnamed: 0,name,continent,population
0,China,AS,1330044000
1,India,AS,1173108018
2,United States,,310232863
3,Indonesia,AS,242968342
4,Brazil,SA,201103330
5,Pakistan,AS,184404791
6,Bangladesh,AS,156118464
7,Nigeria,AF,154000000
8,Russia,EU,140702000
9,Japan,AS,127288000


In [46]:
asian_countries.sort_by(ibis.desc('population')).limit(10)

Unnamed: 0,name,continent,population
0,China,AS,1330044000
1,Bangladesh,AS,156118464
2,Afghanistan,AS,29121286
3,Azerbaijan,AS,8303512
4,United Arab Emirates,AS,4975593
5,Armenia,AS,2968000
6,Bahrain,AS,738004
7,Bhutan,AS,699847
8,Brunei,AS,395027
9,Cocos [Keeling] Islands,AS,628
