# ibis examples

In [1]:
import ibis
import pandas as pd
pd.options.display.max_rows = 20
ibis.options.interactive = True

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv")

In [3]:
df.shape

(244, 7)

In [4]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [19]:
df.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


## Convert Pandas DataFrame to ibis table

In [5]:
tips = ibis.pandas.connect(
    {
       'tips': df,
    }
).table('tips')

In [6]:
tips.columns

['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size']

## Look Up Schema / Data Type Info

In [18]:
tips.schema()

ibis.Schema {  
  total_bill  float64
  tip         float64
  sex         string
  smoker      string
  day         string
  time        string
  size        int64
}

## Selecting certain columns

```
SELECT
    total_bill,
    tip,
    day,
    time
FROM
    tips as tips
```

In [7]:
tips['total_bill','tip','day','time']

     total_bill   tip   day    time
0         16.99  1.01   Sun  Dinner
1         10.34  1.66   Sun  Dinner
2         21.01  3.50   Sun  Dinner
3         23.68  3.31   Sun  Dinner
4         24.59  3.61   Sun  Dinner
..          ...   ...   ...     ...
239       29.03  5.92   Sat  Dinner
240       27.18  2.00   Sat  Dinner
241       22.67  2.00   Sat  Dinner
242       17.82  1.75   Sat  Dinner
243       18.78  3.00  Thur  Dinner

[244 rows x 4 columns]

## Getting Row Count

In [8]:
tips.count()

244

## Limiting the Number of Rows

In [9]:
tips['total_bill','tip','day','time'].limit(5)

   total_bill   tip  day    time
0       16.99  1.01  Sun  Dinner
1       10.34  1.66  Sun  Dinner
2       21.01  3.50  Sun  Dinner
3       23.68  3.31  Sun  Dinner
4       24.59  3.61  Sun  Dinner

## Filtering

```
SELECT
    total_bill,
    tip,
    day,
    time
FROM
    tips as tips
WHERE
    tips.day = 'Sun'
```

In [10]:
tips['total_bill','tip','day','time'].filter(
    tips['day'] == 'Sun'
)

    total_bill   tip  day    time
0        16.99  1.01  Sun  Dinner
1        10.34  1.66  Sun  Dinner
2        21.01  3.50  Sun  Dinner
3        23.68  3.31  Sun  Dinner
4        24.59  3.61  Sun  Dinner
..         ...   ...  ...     ...
71       20.90  3.50  Sun  Dinner
72       30.46  2.00  Sun  Dinner
73       18.15  3.50  Sun  Dinner
74       23.10  4.00  Sun  Dinner
75       15.69  1.50  Sun  Dinner

[76 rows x 4 columns]

## Aggregation

In [21]:
tips.group_by('day').aggregate(tips['total_bill'].mean().name('day_bill_avg'))

    day  day_bill_avg
0   Fri     17.151579
1   Sat     20.441379
2   Sun     21.410000
3  Thur     17.682742

In [24]:
import ibis.postgres

ModuleNotFoundError: No module named 'ibis.postgres'