# Ibis Basics
Author: Mark Bauer

In [1]:
import os
import ibis

In [2]:
%reload_ext watermark
%watermark -v -p ibis

Python implementation: CPython
Python version       : 3.8.13
IPython version      : 8.4.0

ibis: 3.2.0



In [3]:
ls data/

[34mraw[m[m/                  storm-events.db       storm-events.parquet


In [4]:
# we will use Ibis interactive mode
ibis.options.interactive = True

# Connect to a database. I'm using DuckDB.

In [5]:
# reconnect to the persisted database called storm-events
con = ibis.duckdb.connect("data/storm-events.db")

type(con)

ibis.backends.duckdb.Backend

In [6]:
# list tables in .db file
con.list_tables()

['storm_events']

In [7]:
# create a table expression and assign to storm_events alias
storm_events = con.table("storm_events")

type(storm_events)



ibis.expr.types.relations.Table

# Familiarize with Ibis table 

In [8]:
# get schema
storm_events.schema()

ibis.Schema {
  EVENT_ID                   int32
  STATE                      string
  STATE_FIPS                 int32
  YEAR                       int32
  MONTH_NAME                 string
  EVENT_TYPE                 string
  CZ_TYPE                    string
  CZ_FIPS                    int32
  CZ_NAME                    string
  WFO                        string
  BEGIN_DATE_TIME            string
  END_DATE_TIME              string
  INJURIES_DIRECT            int32
  INJURIES_INDIRECT          int32
  DEATHS_DIRECT              int32
  DEATHS_INDIRECT            int32
  DAMAGE_PROPERTY            string
  DAMAGE_CROPS               string
  SOURCE                     string
  DAMAGE_PROPERTY_CONVERTED  float64
}

In [9]:
# summarize table
storm_events.info()

[3m                         Summary of storm_events                          [0m
[3m                               1295193 rows                               [0m
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┓
┃[1m [0m[1mName                     [0m[1m [0m┃[1m [0m[1mType                  [0m[1m [0m┃[1m [0m[1m# Nulls[0m[1m [0m┃[1m [0m[1m% Nulls[0m[1m [0m┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━┩
│ EVENT_ID                  │ [1;35mInt32[0m[1m([0m[33mnullable[0m=[3;92mTrue[0m[1m)[0m   │       0 │    0.00 │
│ STATE                     │ [1;35mString[0m[1m([0m[33mnullable[0m=[3;92mTrue[0m[1m)[0m  │       0 │    0.00 │
│ STATE_FIPS                │ [1;35mInt32[0m[1m([0m[33mnullable[0m=[3;92mTrue[0m[1m)[0m   │       0 │    0.00 │
│ YEAR                      │ [1;35mInt32[0m[1m([0m[33mnullable[0m=[3;92mTrue[0m[1m)[0m   │       0 │    0.00 │
│ MONTH_NAME             

In [10]:
# columns attribute
storm_events.columns

['EVENT_ID',
 'STATE',
 'STATE_FIPS',
 'YEAR',
 'MONTH_NAME',
 'EVENT_TYPE',
 'CZ_TYPE',
 'CZ_FIPS',
 'CZ_NAME',
 'WFO',
 'BEGIN_DATE_TIME',
 'END_DATE_TIME',
 'INJURIES_DIRECT',
 'INJURIES_INDIRECT',
 'DEATHS_DIRECT',
 'DEATHS_INDIRECT',
 'DAMAGE_PROPERTY',
 'DAMAGE_CROPS',
 'SOURCE',
 'DAMAGE_PROPERTY_CONVERTED']

In [11]:
# examine types
type(storm_events['EVENT_TYPE'])

ibis.expr.types.strings.StringColumn

In [12]:
type(storm_events['YEAR'])

ibis.expr.types.numeric.IntegerColumn

In [13]:
type(storm_events["YEAR", "EVENT_TYPE"])

ibis.expr.types.relations.Table

In [14]:
type(storm_events.select('EVENT_TYPE'))

ibis.expr.types.relations.Table

# Preview Data

In [15]:
# preview first five rows
storm_events.head()

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED
0,10096222,OKLAHOMA,40,1950,April,Tornado,C,149,WASHITA,,28-APR-50 14:45:00,28-APR-50 14:45:00,0,0,0,0,250K,0,,250000.0
1,10120412,TEXAS,48,1950,April,Tornado,C,93,COMANCHE,,29-APR-50 15:30:00,29-APR-50 15:30:00,0,0,0,0,25K,0,,25000.0
2,10104927,PENNSYLVANIA,42,1950,July,Tornado,C,77,LEHIGH,,05-JUL-50 18:00:00,05-JUL-50 18:00:00,2,0,0,0,25K,0,,25000.0
3,10104928,PENNSYLVANIA,42,1950,July,Tornado,C,43,DAUPHIN,,05-JUL-50 18:30:00,05-JUL-50 18:30:00,0,0,0,0,2.5K,0,,2500.0
4,10104929,PENNSYLVANIA,42,1950,July,Tornado,C,39,CRAWFORD,,24-JUL-50 14:40:00,24-JUL-50 14:40:00,0,0,0,0,2.5K,0,,2500.0


In [16]:
type(storm_events.head())

ibis.expr.types.relations.Table

In [17]:
# transform to pandas df
storm_events.head().execute()

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED
0,10096222,OKLAHOMA,40,1950,April,Tornado,C,149,WASHITA,,28-APR-50 14:45:00,28-APR-50 14:45:00,0,0,0,0,250K,0,,250000.0
1,10120412,TEXAS,48,1950,April,Tornado,C,93,COMANCHE,,29-APR-50 15:30:00,29-APR-50 15:30:00,0,0,0,0,25K,0,,25000.0
2,10104927,PENNSYLVANIA,42,1950,July,Tornado,C,77,LEHIGH,,05-JUL-50 18:00:00,05-JUL-50 18:00:00,2,0,0,0,25K,0,,25000.0
3,10104928,PENNSYLVANIA,42,1950,July,Tornado,C,43,DAUPHIN,,05-JUL-50 18:30:00,05-JUL-50 18:30:00,0,0,0,0,2.5K,0,,2500.0
4,10104929,PENNSYLVANIA,42,1950,July,Tornado,C,39,CRAWFORD,,24-JUL-50 14:40:00,24-JUL-50 14:40:00,0,0,0,0,2.5K,0,,2500.0


In [18]:
type(storm_events.head().execute())

pandas.core.frame.DataFrame

In [19]:
# numer of rows, same as len()
storm_events.count()

1295193

In [20]:
# limit number of rows
storm_events.limit(5)

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED
0,10096222,OKLAHOMA,40,1950,April,Tornado,C,149,WASHITA,,28-APR-50 14:45:00,28-APR-50 14:45:00,0,0,0,0,250K,0,,250000.0
1,10120412,TEXAS,48,1950,April,Tornado,C,93,COMANCHE,,29-APR-50 15:30:00,29-APR-50 15:30:00,0,0,0,0,25K,0,,25000.0
2,10104927,PENNSYLVANIA,42,1950,July,Tornado,C,77,LEHIGH,,05-JUL-50 18:00:00,05-JUL-50 18:00:00,2,0,0,0,25K,0,,25000.0
3,10104928,PENNSYLVANIA,42,1950,July,Tornado,C,43,DAUPHIN,,05-JUL-50 18:30:00,05-JUL-50 18:30:00,0,0,0,0,2.5K,0,,2500.0
4,10104929,PENNSYLVANIA,42,1950,July,Tornado,C,39,CRAWFORD,,24-JUL-50 14:40:00,24-JUL-50 14:40:00,0,0,0,0,2.5K,0,,2500.0


# Methods
## Select

In [21]:
storm_events.select(
    "YEAR",
    "STATE",
    "EVENT_TYPE",
    "DAMAGE_PROPERTY_CONVERTED"
)

Unnamed: 0,YEAR,STATE,EVENT_TYPE,DAMAGE_PROPERTY_CONVERTED
0,1950,OKLAHOMA,Tornado,250000.0
1,1950,TEXAS,Tornado,25000.0
2,1950,PENNSYLVANIA,Tornado,25000.0
3,1950,PENNSYLVANIA,Tornado,2500.0
4,1950,PENNSYLVANIA,Tornado,2500.0
...,...,...,...,...
9995,1962,NEBRASKA,Tornado,0.0
9996,1962,NEBRASKA,Tornado,25000.0
9997,1962,NEBRASKA,Hail,0.0
9998,1962,NEBRASKA,Thunderstorm Wind,0.0


## Drop


In [22]:
cols = [
    'STATE_FIPS',
    'MONTH_NAME',
    'EVENT_TYPE',
    'CZ_TYPE',
    'CZ_FIPS',
    'CZ_NAME',
    'WFO',
    'BEGIN_DATE_TIME',
    'END_DATE_TIME',
    'INJURIES_DIRECT',
    'INJURIES_INDIRECT',
    'DEATHS_DIRECT',
    'DEATHS_INDIRECT',
    'DAMAGE_PROPERTY',
    'DAMAGE_CROPS',
    'SOURCE',
]

storm_events.drop(cols)

Unnamed: 0,EVENT_ID,STATE,YEAR,DAMAGE_PROPERTY_CONVERTED
0,10096222,OKLAHOMA,1950,250000.0
1,10120412,TEXAS,1950,25000.0
2,10104927,PENNSYLVANIA,1950,25000.0
3,10104928,PENNSYLVANIA,1950,2500.0
4,10104929,PENNSYLVANIA,1950,2500.0
...,...,...,...,...
9995,10072832,NEBRASKA,1962,0.0
9996,10072833,NEBRASKA,1962,25000.0
9997,10072834,NEBRASKA,1962,0.0
9998,10072835,NEBRASKA,1962,0.0


## Filter

In [23]:
(storm_events
 .filter(storm_events.STATE == "NEW YORK")
 .limit(10)
)

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED
0,10075659,NEW YORK,36,1952,May,Tornado,C,31,ESSEX,,06-MAY-52 13:00:00,06-MAY-52 13:00:00,0,0,0,0,25K,0,,25000.0
1,10075660,NEW YORK,36,1954,July,Tornado,C,123,YATES,,14-JUL-54 16:30:00,14-JUL-54 16:30:00,0,0,0,0,2.5M,0,,2500000.0
2,10075661,NEW YORK,36,1954,September,Tornado,C,89,ST. LAWRENCE,,19-SEP-54 17:30:00,19-SEP-54 17:30:00,0,0,0,0,25K,0,,25000.0
3,10075701,NEW YORK,36,1958,June,Thunderstorm Wind,C,29,ERIE,,25-JUN-58 18:30:00,25-JUN-58 18:30:00,0,0,0,0,0,0,,0.0
4,10075702,NEW YORK,36,1958,June,Thunderstorm Wind,C,29,ERIE,,25-JUN-58 18:30:00,25-JUN-58 18:30:00,0,0,0,0,0,0,,0.0
5,10075703,NEW YORK,36,1958,June,Thunderstorm Wind,C,55,MONROE,,25-JUN-58 19:00:00,25-JUN-58 19:00:00,0,0,0,0,0,0,,0.0
6,10075704,NEW YORK,36,1958,June,Thunderstorm Wind,C,51,LIVINGSTON,,25-JUN-58 19:00:00,25-JUN-58 19:00:00,0,0,0,0,0,0,,0.0
7,10075697,NEW YORK,36,1958,May,Thunderstorm Wind,C,17,CHENANGO,,18-MAY-58 14:00:00,18-MAY-58 14:00:00,0,0,0,0,0,0,,0.0
8,10075698,NEW YORK,36,1958,May,Thunderstorm Wind,C,11,CAYUGA,,18-MAY-58 14:15:00,18-MAY-58 14:15:00,0,0,0,0,0,0,,0.0
9,10075699,NEW YORK,36,1958,May,Hail,C,117,WAYNE,,18-MAY-58 14:30:00,18-MAY-58 14:30:00,0,0,0,0,0,0,,0.0


In [24]:
storm_events.filter(
    (storm_events.STATE == "NEW YORK")
    & (storm_events.DAMAGE_PROPERTY_CONVERTED > 90_000_000)
)

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED
0,5516309,NEW YORK,36,2006,June,Flash Flood,C,107,TIOGA,BGM,27-JUN-06 14:10:00,28-JUN-06 14:00:00,0,0,0,0,100M,,EMERGENCY MANAGER,100000000.0
1,5519036,NEW YORK,36,2006,June,Flash Flood,C,105,SULLIVAN,BGM,27-JUN-06 04:00:00,28-JUN-06 14:00:00,0,0,1,0,100M,,EMERGENCY MANAGER,100000000.0
2,5519040,NEW YORK,36,2006,June,Flash Flood,C,7,BROOME,BGM,27-JUN-06 16:00:00,28-JUN-06 14:00:00,0,0,0,0,200M,,EMERGENCY MANAGER,200000000.0
3,5519034,NEW YORK,36,2006,June,Flash Flood,C,25,DELAWARE,BGM,27-JUN-06 08:10:00,28-JUN-06 13:00:00,0,0,2,0,250M,,LAW ENFORCEMENT,250000000.0
4,1266,NEW YORK,36,2006,October,Lake-Effect Snow,Z,10,NORTHERN ERIE,BUF,12-OCT-06 14:30:00,13-OCT-06 07:00:00,0,0,1,0,130.00M,0.00K,Law Enforcement,130000000.0
5,206970,NEW YORK,36,2009,December,High Wind,Z,7,JEFFERSON,BUF,09-DEC-09 13:00:00,10-DEC-09 09:00:00,0,0,0,0,100.00M,0.00K,Utility Company,100000000.0
6,348760,NEW YORK,36,2011,September,Flood,C,107,TIOGA,BGM,07-SEP-11 16:54:00,11-SEP-11 03:00:00,0,0,0,0,309.00M,0.00K,River/Stream Gage,309000000.0
7,348756,NEW YORK,36,2011,September,Flood,C,7,BROOME,BGM,07-SEP-11 14:23:00,11-SEP-11 03:45:00,0,0,0,0,170.00M,0.00K,River/Stream Gage,170000000.0
8,349312,NEW YORK,36,2011,September,Flood,C,7,BROOME,BGM,07-SEP-11 12:22:00,11-SEP-11 23:40:00,0,0,0,0,160.00M,0.00K,River/Stream Gage,160000000.0
9,995048,NEW YORK,36,2021,September,Flash Flood,C,119,WESTCHESTER,OKX,01-SEP-21 21:30:00,01-SEP-21 22:00:00,0,0,0,0,92.60M,0.00K,Emergency Manager,92600000.0


In [25]:
# select desired columns
new_york = (
    storm_events
    .select(["STATE", "EVENT_TYPE", "YEAR", "DAMAGE_PROPERTY", "DAMAGE_PROPERTY_CONVERTED"])
    .filter(
        (storm_events["STATE"] == "NEW YORK")
        & (storm_events.DAMAGE_PROPERTY_CONVERTED > 90_000_000))
)
    
new_york

Unnamed: 0,STATE,EVENT_TYPE,YEAR,DAMAGE_PROPERTY,DAMAGE_PROPERTY_CONVERTED
0,NEW YORK,Flash Flood,2006,100M,100000000.0
1,NEW YORK,Flash Flood,2006,100M,100000000.0
2,NEW YORK,Flash Flood,2006,200M,200000000.0
3,NEW YORK,Flash Flood,2006,250M,250000000.0
4,NEW YORK,Lake-Effect Snow,2006,130.00M,130000000.0
5,NEW YORK,High Wind,2009,100.00M,100000000.0
6,NEW YORK,Flood,2011,309.00M,309000000.0
7,NEW YORK,Flood,2011,170.00M,170000000.0
8,NEW YORK,Flood,2011,160.00M,160000000.0
9,NEW YORK,Flash Flood,2021,92.60M,92600000.0


## Mutate

In [26]:
conversion = storm_events.DAMAGE_PROPERTY_CONVERTED / 1000

storm_events.mutate(DAMAGE_PROPERTY_CONVERTED_K = conversion)

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,...,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED,DAMAGE_PROPERTY_CONVERTED_K
0,10096222,OKLAHOMA,40,1950,April,Tornado,C,149,WASHITA,,...,28-APR-50 14:45:00,0,0,0,0,250K,0,,250000.0,250.0
1,10120412,TEXAS,48,1950,April,Tornado,C,93,COMANCHE,,...,29-APR-50 15:30:00,0,0,0,0,25K,0,,25000.0,25.0
2,10104927,PENNSYLVANIA,42,1950,July,Tornado,C,77,LEHIGH,,...,05-JUL-50 18:00:00,2,0,0,0,25K,0,,25000.0,25.0
3,10104928,PENNSYLVANIA,42,1950,July,Tornado,C,43,DAUPHIN,,...,05-JUL-50 18:30:00,0,0,0,0,2.5K,0,,2500.0,2.5
4,10104929,PENNSYLVANIA,42,1950,July,Tornado,C,39,CRAWFORD,,...,24-JUL-50 14:40:00,0,0,0,0,2.5K,0,,2500.0,2.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,10072832,NEBRASKA,31,1962,May,Tornado,C,113,LOGAN,,...,14-MAY-62 18:30:00,0,0,0,0,0K,0,,0.0,0.0
9996,10072833,NEBRASKA,31,1962,May,Tornado,C,31,CHERRY,,...,15-MAY-62 17:00:00,0,0,0,0,25K,0,,25000.0,25.0
9997,10072834,NEBRASKA,31,1962,May,Hail,C,103,KEYA PAHA,,...,15-MAY-62 17:20:00,0,0,0,0,0,0,,0.0,0.0
9998,10072835,NEBRASKA,31,1962,May,Thunderstorm Wind,C,113,LOGAN,,...,15-MAY-62 19:00:00,0,0,0,0,0,0,,0.0,0.0


## Order By

In [27]:
(storm_events
 .sort_by(storm_events.DAMAGE_PROPERTY_CONVERTED)
 .select(
    "YEAR",
    "STATE",
    "EVENT_TYPE",
    "DAMAGE_PROPERTY_CONVERTED")
 .limit(10)
)

Unnamed: 0,YEAR,STATE,EVENT_TYPE,DAMAGE_PROPERTY_CONVERTED
0,2011,HAWAII,High Surf,0.0
1,2011,HAWAII,High Surf,0.0
2,2011,HAWAII,High Surf,0.0
3,2011,HAWAII,High Surf,0.0
4,2011,HAWAII,High Surf,0.0
5,2011,HAWAII,High Surf,0.0
6,2011,ILLINOIS,Hail,0.0
7,2011,ILLINOIS,Tornado,0.0
8,2011,ILLINOIS,Flash Flood,0.0
9,2011,NEW MEXICO,Drought,0.0


In [28]:
(storm_events
 .sort_by(ibis.desc("DAMAGE_PROPERTY_CONVERTED"))
 .select(
    "YEAR",
    "STATE",
    "EVENT_TYPE",
    "DAMAGE_PROPERTY_CONVERTED")
 .limit(10)
)

Unnamed: 0,YEAR,STATE,EVENT_TYPE,DAMAGE_PROPERTY_CONVERTED
0,2020,WASHINGTON,High Wind,950000000.0
1,2013,ILLINOIS,Tornado,910000000.0
2,2012,TEXAS,Hail,900000000.0
3,2020,LOUISIANA,Hurricane,900000000.0
4,2010,ARIZONA,Hail,900000000.0
5,2004,FLORIDA,High Wind,881000000.0
6,2005,LOUISIANA,Hurricane (Typhoon),850000000.0
7,2005,LOUISIANA,Hurricane (Typhoon),850000000.0
8,2020,LOUISIANA,Hurricane,800000000.0
9,2005,MISSISSIPPI,Hurricane (Typhoon),750000000.0


## Aggregate

In [29]:
storm_events.count()

1295193

In [30]:
storm_events.DAMAGE_PROPERTY_CONVERTED.min()

0.0

In [31]:
storm_events.DAMAGE_PROPERTY_CONVERTED.max()

950000000.0

In [32]:
storm_events.DAMAGE_PROPERTY_CONVERTED.mean()

222760.43361035068

In [33]:
storm_events.DAMAGE_PROPERTY_CONVERTED.sum()

288517754289.09094

## Group By

In [34]:
type(storm_events.group_by("EVENT_TYPE"))

ibis.expr.types.groupby.GroupedTable

In [35]:
(storm_events
 .group_by(["YEAR", "EVENT_TYPE"])
 .aggregate(storm_events.DAMAGE_PROPERTY_CONVERTED.sum().name("DAMAGE_PROPERTY"))
 .sort_by(ibis.desc("DAMAGE_PROPERTY"))
 .limit(10)
)

Unnamed: 0,YEAR,EVENT_TYPE,DAMAGE_PROPERTY
0,2017,Flash Flood,19118690000.0
1,2005,Hurricane (Typhoon),11112730000.0
2,2021,Hurricane,7588400000.0
3,2018,Hurricane,7143780000.0
4,2020,Hurricane,6702710000.0
5,2011,Tornado,3884651000.0
6,2008,Hurricane (Typhoon),3663098000.0
7,2016,Flash Flood,3645402000.0
8,2017,Tropical Storm,3645017000.0
9,2011,Flood,3621686000.0


In [36]:
(storm_events
 .group_by(["YEAR", "EVENT_TYPE"])
 .aggregate(storm_events.DAMAGE_PROPERTY_CONVERTED.mean().name("DAMAGE_PROPERTY_AVG"))
 .sort_by(ibis.desc("DAMAGE_PROPERTY_AVG"))
 .limit(10)
)

Unnamed: 0,YEAR,EVENT_TYPE,DAMAGE_PROPERTY_AVG
0,2016,Hurricane,201733300.0
1,2021,Hurricane,199694700.0
2,2020,Hurricane,113605300.0
3,2018,Hurricane,106623600.0
4,2017,Hurricane,82106490.0
5,2009,Tsunami,81000000.0
6,2021,Storm Surge/Tide,72908670.0
7,2008,Hurricane (Typhoon),50876350.0
8,2018,Storm Surge/Tide,47735090.0
9,1997,Hurricane (Typhoon),47223570.0


## Cast

In [37]:
expr = storm_events.DAMAGE_PROPERTY_CONVERTED.cast("int32")

storm_events.mutate(DAMAGE_PROPERTY_CONVERTED_INT=expr)

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,...,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED,DAMAGE_PROPERTY_CONVERTED_INT
0,10096222,OKLAHOMA,40,1950,April,Tornado,C,149,WASHITA,,...,28-APR-50 14:45:00,0,0,0,0,250K,0,,250000.0,250000
1,10120412,TEXAS,48,1950,April,Tornado,C,93,COMANCHE,,...,29-APR-50 15:30:00,0,0,0,0,25K,0,,25000.0,25000
2,10104927,PENNSYLVANIA,42,1950,July,Tornado,C,77,LEHIGH,,...,05-JUL-50 18:00:00,2,0,0,0,25K,0,,25000.0,25000
3,10104928,PENNSYLVANIA,42,1950,July,Tornado,C,43,DAUPHIN,,...,05-JUL-50 18:30:00,0,0,0,0,2.5K,0,,2500.0,2500
4,10104929,PENNSYLVANIA,42,1950,July,Tornado,C,39,CRAWFORD,,...,24-JUL-50 14:40:00,0,0,0,0,2.5K,0,,2500.0,2500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,10072832,NEBRASKA,31,1962,May,Tornado,C,113,LOGAN,,...,14-MAY-62 18:30:00,0,0,0,0,0K,0,,0.0,0
9996,10072833,NEBRASKA,31,1962,May,Tornado,C,31,CHERRY,,...,15-MAY-62 17:00:00,0,0,0,0,25K,0,,25000.0,25000
9997,10072834,NEBRASKA,31,1962,May,Hail,C,103,KEYA PAHA,,...,15-MAY-62 17:20:00,0,0,0,0,0,0,,0.0,0
9998,10072835,NEBRASKA,31,1962,May,Thunderstorm Wind,C,113,LOGAN,,...,15-MAY-62 19:00:00,0,0,0,0,0,0,,0.0,0


## Other

In [38]:
# distinct values
storm_events.select("EVENT_TYPE").distinct()

Unnamed: 0,EVENT_TYPE
0,Marine Thunderstorm Wind
1,Ice Storm
2,Cold/Wind Chill
3,Marine Hurricane/Typhoon
4,HAIL FLOODING
...,...
64,Marine Strong Wind
65,"TORNADOES, TSTM WIND, HAIL"
66,THUNDERSTORM WIND/ TREES
67,THUNDERSTORM WINDS HEAVY RAIN


In [39]:
# convert to sql statement
expr = (
    storm_events
    .group_by(["YEAR", "EVENT_TYPE"])
    .aggregate(storm_events.DAMAGE_PROPERTY_CONVERTED.sum())
    .sort_by(ibis.desc("sum"))
    .limit(10)
)

ibis.to_sql(expr, dialect="postgres")

'SELECT\n  t0."YEAR",\n  t0."EVENT_TYPE",\n  t0.sum\nFROM (\n  SELECT\n    t1."YEAR" AS "YEAR",\n    t1."EVENT_TYPE" AS "EVENT_TYPE",\n    SUM(t1."DAMAGE_PROPERTY_CONVERTED") AS sum\n  FROM storm_events AS t1\n  GROUP BY\n    t1."YEAR",\n    t1."EVENT_TYPE"\n) AS t0\nORDER BY\n  t0.sum DESC\nLIMIT 10'

In [40]:
# is in
storm_events[storm_events.EVENT_TYPE.isin(["Hail"])]

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED
0,10071654,NEBRASKA,31,1958,June,Hail,C,165,SIOUX,,18-JUN-58 14:00:00,18-JUN-58 14:00:00,0,0,0,0,0,0,,0.0
1,10071660,NEBRASKA,31,1958,June,Hail,C,145,RED WILLOW,,30-JUN-58 19:10:00,30-JUN-58 19:10:00,0,0,0,0,0,0,,0.0
2,10121841,TEXAS,48,1958,July,Hail,C,153,FLOYD,,31-JUL-58 19:45:00,31-JUL-58 19:45:00,0,0,0,0,0,0,,0.0
3,10124075,TENNESSEE,47,1958,April,Hail,C,145,ROANE,,24-APR-58 17:50:00,24-APR-58 17:50:00,0,0,0,0,0,0,,0.0
4,10028944,KANSAS,20,1958,May,Hail,C,167,RUSSELL,,24-MAY-58 15:57:00,24-MAY-58 15:57:00,0,0,0,0,0,0,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,10136016,TEXAS,48,1969,June,Hail,C,205,HARTLEY,,17-JUN-69 18:00:00,17-JUN-69 18:00:00,0,0,0,0,0,0,,0.0
9996,10136017,TEXAS,48,1969,June,Hail,C,375,POTTER,,17-JUN-69 20:55:00,17-JUN-69 20:55:00,0,0,0,0,0,0,,0.0
9997,10136018,TEXAS,48,1969,June,Hail,C,191,HALL,,17-JUN-69 22:45:00,17-JUN-69 22:45:00,0,0,0,0,0,0,,0.0
9998,10136019,TEXAS,48,1969,June,Hail,C,75,CHILDRESS,,17-JUN-69 23:45:00,17-JUN-69 23:45:00,0,0,0,0,0,0,,0.0


In [41]:
# not in
storm_events[storm_events.EVENT_TYPE.notin(["Hail"])]

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED
0,10096222,OKLAHOMA,40,1950,April,Tornado,C,149,WASHITA,,28-APR-50 14:45:00,28-APR-50 14:45:00,0,0,0,0,250K,0,,250000.0
1,10120412,TEXAS,48,1950,April,Tornado,C,93,COMANCHE,,29-APR-50 15:30:00,29-APR-50 15:30:00,0,0,0,0,25K,0,,25000.0
2,10104927,PENNSYLVANIA,42,1950,July,Tornado,C,77,LEHIGH,,05-JUL-50 18:00:00,05-JUL-50 18:00:00,2,0,0,0,25K,0,,25000.0
3,10104928,PENNSYLVANIA,42,1950,July,Tornado,C,43,DAUPHIN,,05-JUL-50 18:30:00,05-JUL-50 18:30:00,0,0,0,0,2.5K,0,,2500.0
4,10104929,PENNSYLVANIA,42,1950,July,Tornado,C,39,CRAWFORD,,24-JUL-50 14:40:00,24-JUL-50 14:40:00,0,0,0,0,2.5K,0,,2500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,10117786,PENNSYLVANIA,42,1956,August,Thunderstorm Wind,C,85,MERCER,,18-AUG-56 18:00:00,18-AUG-56 18:00:00,0,0,0,0,0,0,,0.0
9996,10117787,PENNSYLVANIA,42,1956,August,Thunderstorm Wind,C,25,CARBON,,28-AUG-56 19:00:00,28-AUG-56 19:00:00,0,0,0,0,0,0,,0.0
9997,10007476,ILLINOIS,17,1956,April,Tornado,C,101,LAWRENCE,,03-APR-56 14:30:00,03-APR-56 14:30:00,0,0,0,0,25K,0,,25000.0
9998,10007477,ILLINOIS,17,1956,April,Thunderstorm Wind,C,197,WILL,,03-APR-56 15:15:00,03-APR-56 15:15:00,0,0,0,0,0,0,,0.0


In [42]:
# is null
storm_events[storm_events.SOURCE.isnull()]

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED
0,10096222,OKLAHOMA,40,1950,April,Tornado,C,149,WASHITA,,28-APR-50 14:45:00,28-APR-50 14:45:00,0,0,0,0,250K,0,,250000.0
1,10120412,TEXAS,48,1950,April,Tornado,C,93,COMANCHE,,29-APR-50 15:30:00,29-APR-50 15:30:00,0,0,0,0,25K,0,,25000.0
2,10104927,PENNSYLVANIA,42,1950,July,Tornado,C,77,LEHIGH,,05-JUL-50 18:00:00,05-JUL-50 18:00:00,2,0,0,0,25K,0,,25000.0
3,10104928,PENNSYLVANIA,42,1950,July,Tornado,C,43,DAUPHIN,,05-JUL-50 18:30:00,05-JUL-50 18:30:00,0,0,0,0,2.5K,0,,2500.0
4,10104929,PENNSYLVANIA,42,1950,July,Tornado,C,39,CRAWFORD,,24-JUL-50 14:40:00,24-JUL-50 14:40:00,0,0,0,0,2.5K,0,,2500.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,10072832,NEBRASKA,31,1962,May,Tornado,C,113,LOGAN,,14-MAY-62 18:30:00,14-MAY-62 18:30:00,0,0,0,0,0K,0,,0.0
9996,10072833,NEBRASKA,31,1962,May,Tornado,C,31,CHERRY,,15-MAY-62 17:00:00,15-MAY-62 17:00:00,0,0,0,0,25K,0,,25000.0
9997,10072834,NEBRASKA,31,1962,May,Hail,C,103,KEYA PAHA,,15-MAY-62 17:20:00,15-MAY-62 17:20:00,0,0,0,0,0,0,,0.0
9998,10072835,NEBRASKA,31,1962,May,Thunderstorm Wind,C,113,LOGAN,,15-MAY-62 19:00:00,15-MAY-62 19:00:00,0,0,0,0,0,0,,0.0


In [43]:
# not null
storm_events[storm_events.SOURCE.notnull()]

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED
0,990000001,FLORIDA,12,1972,June,Tornado,C,43,GLADES,,18-JUN-72 15:17:00,18-JUN-72 15:17:00,6,0,0,0,190K,5K,Manual Input,190000.0
1,990000002,FLORIDA,12,1972,June,Tornado,C,51,HENDRY,,18-JUN-72 15:13:00,18-JUN-72 15:13:00,1,0,1,0,10K,5K,Manual Input,10000.0
2,990000003,FLORIDA,12,1972,June,Tornado,C,93,OKEECHOBEE,,18-JUN-72 22:55:00,18-JUN-72 22:55:00,44,0,6,0,500K,0,Manual Input,500000.0
3,10032307,Kentucky,21,1974,April,Tornado,C,215,SPENCER,,03-APR-74 17:25:00,03-APR-74 17:31:00,0,0,0,0,25K,0,WFO,25000.0
4,10032301,Kentucky,21,1974,April,Tornado,C,93,HARDIN,,03-APR-74 16:45:00,03-APR-74 16:55:00,57,0,2,0,250K,0,WFO,250000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,5678048,OKLAHOMA,40,1998,October,Tornado,C,3,ALFALFA,OUN,04-OCT-98 14:38:00,04-OCT-98 14:43:00,0,0,0,0,55K,,UNKNOWN,55000.0
9996,5678049,OKLAHOMA,40,1998,October,Tornado,C,11,BLAINE,OUN,04-OCT-98 16:50:00,04-OCT-98 17:12:00,0,0,0,0,50K,,UNKNOWN,50000.0
9997,5673341,NEBRASKA,31,1998,November,High Wind,Z,77,FILLMORE,GID,10-NOV-98 04:00:00,10-NOV-98 16:00:00,0,0,0,0,0,0,"AWOS,ASOS,MESONET,ETC",0.0
9998,5676260,PENNSYLVANIA,42,1998,November,Heat,Z,67,CHESTER,PHI,28-NOV-98 10:00:00,30-NOV-98 23:59:00,0,0,0,0,0,0,"AWOS,ASOS,MESONET,ETC",0.0


In [44]:
# dropna
storm_events.dropna(subset=["SOURCE"])

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED
0,990000001,FLORIDA,12,1972,June,Tornado,C,43,GLADES,,18-JUN-72 15:17:00,18-JUN-72 15:17:00,6,0,0,0,190K,5K,Manual Input,190000.0
1,990000002,FLORIDA,12,1972,June,Tornado,C,51,HENDRY,,18-JUN-72 15:13:00,18-JUN-72 15:13:00,1,0,1,0,10K,5K,Manual Input,10000.0
2,990000003,FLORIDA,12,1972,June,Tornado,C,93,OKEECHOBEE,,18-JUN-72 22:55:00,18-JUN-72 22:55:00,44,0,6,0,500K,0,Manual Input,500000.0
3,10032307,Kentucky,21,1974,April,Tornado,C,215,SPENCER,,03-APR-74 17:25:00,03-APR-74 17:31:00,0,0,0,0,25K,0,WFO,25000.0
4,10032301,Kentucky,21,1974,April,Tornado,C,93,HARDIN,,03-APR-74 16:45:00,03-APR-74 16:55:00,57,0,2,0,250K,0,WFO,250000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,5678048,OKLAHOMA,40,1998,October,Tornado,C,3,ALFALFA,OUN,04-OCT-98 14:38:00,04-OCT-98 14:43:00,0,0,0,0,55K,,UNKNOWN,55000.0
9996,5678049,OKLAHOMA,40,1998,October,Tornado,C,11,BLAINE,OUN,04-OCT-98 16:50:00,04-OCT-98 17:12:00,0,0,0,0,50K,,UNKNOWN,50000.0
9997,5673341,NEBRASKA,31,1998,November,High Wind,Z,77,FILLMORE,GID,10-NOV-98 04:00:00,10-NOV-98 16:00:00,0,0,0,0,0,0,"AWOS,ASOS,MESONET,ETC",0.0
9998,5676260,PENNSYLVANIA,42,1998,November,Heat,Z,67,CHESTER,PHI,28-NOV-98 10:00:00,30-NOV-98 23:59:00,0,0,0,0,0,0,"AWOS,ASOS,MESONET,ETC",0.0


In [45]:
# contains
storm_events[storm_events.EVENT_TYPE.lower().contains("flood")]

Unnamed: 0,EVENT_ID,STATE,STATE_FIPS,YEAR,MONTH_NAME,EVENT_TYPE,CZ_TYPE,CZ_FIPS,CZ_NAME,WFO,BEGIN_DATE_TIME,END_DATE_TIME,INJURIES_DIRECT,INJURIES_INDIRECT,DEATHS_DIRECT,DEATHS_INDIRECT,DAMAGE_PROPERTY,DAMAGE_CROPS,SOURCE,DAMAGE_PROPERTY_CONVERTED
0,10347267,SOUTH CAROLINA,45,1994,April,THUNDERSTORM WINDS/FLOODING,C,33,DILLON,,15-APR-94 15:40:00,15-APR-94 15:40:00,0,0,0,0,5K,0,,5000.0
1,10335845,NEVADA,32,1995,June,HAIL FLOODING,C,0,NVZ003 - 004,,01-JUN-95 16:30:00,01-JUN-95 19:10:00,0,0,0,0,0,0,,0.0
2,10335846,NEVADA,32,1995,August,THUNDERSTORM WINDS/FLASH FLOOD,C,0,NVZ003 - 004,,05-AUG-95 19:52:00,05-AUG-95 21:00:00,0,0,0,0,0,0,,0.0
3,10355544,TEXAS,48,1995,April,THUNDERSTORM WINDS/ FLOOD,C,489,WILLACY,,04-APR-95 19:30:00,04-APR-95 19:30:00,0,0,0,0,10K,0,,10000.0
4,10355545,TEXAS,48,1995,April,THUNDERSTORM WINDS/ FLOOD,C,489,WILLACY,,04-APR-95 20:30:00,04-APR-95 20:30:00,0,0,0,0,10K,30K,,10000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,5682038,MISSISSIPPI,28,1999,January,Flash Flood,C,1,ADAMS,JAN,29-JAN-99 17:00:00,30-JAN-99 01:30:00,0,0,0,0,50K,,LAW ENFORCEMENT,50000.0
9996,5695759,TEXAS,48,1999,May,Flash Flood,C,265,KERR,EWX,11-MAY-99 21:30:00,11-MAY-99 22:30:00,0,0,0,0,10K,0,LAW ENFORCEMENT,10000.0
9997,5696264,MISSISSIPPI,28,1999,April,Flood,C,33,DE SOTO,MEG,03-APR-99 19:05:00,03-APR-99 19:10:00,0,0,0,0,1K,,AMATEUR RADIO,1000.0
9998,5680504,TEXAS,48,1999,January,Flash Flood,C,37,BOWIE,SHV,30-JAN-99 01:00:00,30-JAN-99 03:30:00,0,0,0,0,166K,,EMERGENCY MANAGER,166000.0
