In [1]:
import polars as pl
from pathlib import Path

In [2]:
base_folder = Path('data/nycflights13')
df_airlines = pl.read_csv(base_folder / 'airlines.csv')
df_airports = pl.read_csv(base_folder / 'airports.csv')
df_planes = pl.read_csv(base_folder / 'planes.csv', null_values=['NA'])
df_weather = pl.read_csv(base_folder / 'weather.csv', null_values=['NA'], infer_schema_length=100000)
df_flights = pl.read_csv(base_folder / 'flights.csv')

In [3]:
df_flights.head(5)

year,month,day,dep_time,sched_dep_time,dep_delay,arr_time,sched_arr_time,arr_delay,carrier,flight,tailnum,origin,dest,air_time,distance,hour,minute,time_hour
i64,i64,i64,i64,i64,f64,i64,i64,f64,str,i64,str,str,str,f64,f64,f64,f64,f64
2013,1,1,517,515,2.0,830,819,11.0,"""UA""",1545,"""N14228""","""EWR""","""IAH""",227.0,1400.0,5.0,15.0,1357000000.0
2013,1,1,533,529,4.0,850,830,20.0,"""UA""",1714,"""N24211""","""LGA""","""IAH""",227.0,1416.0,5.0,29.0,1357000000.0
2013,1,1,542,540,2.0,923,850,33.0,"""AA""",1141,"""N619AA""","""JFK""","""MIA""",160.0,1089.0,5.0,40.0,1357000000.0
2013,1,1,544,545,-1.0,1004,1022,-18.0,"""B6""",725,"""N804JB""","""JFK""","""BQN""",183.0,1576.0,5.0,45.0,1357000000.0
2013,1,1,554,600,-6.0,812,837,-25.0,"""DL""",461,"""N668DN""","""LGA""","""ATL""",116.0,762.0,6.0,0.0,1357000000.0


In [4]:
df_airlines.head(5)

carrier,name
str,str
"""9E""","""Endeavor Air Inc."""
"""AA""","""American Airlines Inc."""
"""AS""","""Alaska Airlines Inc."""
"""B6""","""JetBlue Airways"""
"""DL""","""Delta Air Lines Inc."""


In [5]:
df_airports.head(5)

faa,name,lat,lon,alt,tz,dst,tzone
str,str,f64,f64,i64,i64,str,str
"""04G""","""Lansdowne Airport""",41.130472,-80.619583,1044,-5,"""A""","""America/New_York"""
"""06A""","""Moton Field Municipal Airport""",32.460572,-85.680028,264,-6,"""A""","""America/Chicago"""
"""06C""","""Schaumburg Regional""",41.989341,-88.101243,801,-6,"""A""","""America/Chicago"""
"""06N""","""Randall Airport""",41.431912,-74.391561,523,-5,"""A""","""America/New_York"""
"""09J""","""Jekyll Island Airport""",31.074472,-81.427778,11,-5,"""A""","""America/New_York"""


In [6]:
df_planes.head(5)

tailnum,year,type,manufacturer,model,engines,seats,speed,engine
str,i64,str,str,str,i64,i64,str,str
"""N10156""",2004,"""Fixed wing multi engine""","""EMBRAER""","""EMB-145XR""",2,55,,"""Turbo-fan"""
"""N102UW""",1998,"""Fixed wing multi engine""","""AIRBUS INDUSTRIE""","""A320-214""",2,182,,"""Turbo-fan"""
"""N103US""",1999,"""Fixed wing multi engine""","""AIRBUS INDUSTRIE""","""A320-214""",2,182,,"""Turbo-fan"""
"""N104UW""",1999,"""Fixed wing multi engine""","""AIRBUS INDUSTRIE""","""A320-214""",2,182,,"""Turbo-fan"""
"""N10575""",2002,"""Fixed wing multi engine""","""EMBRAER""","""EMB-145LR""",2,55,,"""Turbo-fan"""


In [7]:
df_weather.head(5)

origin,year,month,day,hour,temp,dewp,humid,wind_dir,wind_speed,wind_gust,precip,pressure,visib,time_hour
str,i64,i64,i64,i64,f64,f64,f64,i64,f64,f64,f64,f64,f64,str
"""EWR""",2013,1,1,1,39.02,26.06,59.37,270,10.35702,,0.0,1012.0,10.0,"""2013-01-01T06:00:00Z"""
"""EWR""",2013,1,1,2,39.02,26.96,61.63,250,8.05546,,0.0,1012.3,10.0,"""2013-01-01T07:00:00Z"""
"""EWR""",2013,1,1,3,39.02,28.04,64.43,240,11.5078,,0.0,1012.5,10.0,"""2013-01-01T08:00:00Z"""
"""EWR""",2013,1,1,4,39.92,28.04,62.21,250,12.65858,,0.0,1012.2,10.0,"""2013-01-01T09:00:00Z"""
"""EWR""",2013,1,1,5,39.02,28.04,64.43,260,12.65858,,0.0,1011.9,10.0,"""2013-01-01T10:00:00Z"""


In [8]:
df_weather.select(pl.col('origin').unique())

origin
str
"""EWR"""
"""LGA"""
"""JFK"""


In [9]:
df_flights.select(pl.col('origin').unique())

origin
str
"""EWR"""
"""LGA"""
"""JFK"""


In [10]:
df_weather.select(
    pl.datetime(
        pl.col('year'), 
        pl.col('month'),
        pl.col('day'),
        pl.col('hour'),
    ),
    (pl.col('temp') - 32) / 1.8
)

datetime,temp
datetime[μs],f64
2013-01-01 01:00:00,3.9
2013-01-01 02:00:00,3.9
2013-01-01 03:00:00,3.9
2013-01-01 04:00:00,4.4
2013-01-01 05:00:00,3.9
…,…
2013-12-30 14:00:00,2.2
2013-12-30 15:00:00,1.1
2013-12-30 16:00:00,0.0
2013-12-30 17:00:00,-0.6


In [11]:
df_airports_loc = df_airports.select('faa', 'lat', 'lon')

In [12]:
df_route = (
    df_flights
    .group_by('origin', 'dest')
    .agg(
        pl.len()
    )
    .join(
        df_airports_loc.select(pl.all().name.suffix('_origin')), 
        left_on="origin", 
        right_on="faa_origin", 
    )
    .join(
        df_airports_loc.select(pl.all().name.suffix('_dest')), 
        left_on="dest", 
        right_on="faa_dest"
    )
)
df_route.head(5)

origin,dest,len,lat_origin,lon_origin,lat_dest,lon_dest
str,str,u32,f64,f64,f64,f64
"""JFK""","""ABQ""",254,40.639751,-73.778925,35.040222,-106.609194
"""JFK""","""ACK""",265,40.639751,-73.778925,41.253053,-70.060181
"""EWR""","""ALB""",439,40.6925,-74.168667,42.748267,-73.801692
"""EWR""","""ANC""",8,40.6925,-74.168667,61.174361,-149.996361
"""EWR""","""ATL""",5022,40.6925,-74.168667,33.636719,-84.428067


In [13]:
import holoviews as hv
from holoviews import opts
hv.extension('bokeh', inline=False)
opts.defaults(opts.Curve(width=800, show_grid=True))

In [17]:
import geopandas as gpd
import shapely
import hvplot.pandas

In [15]:
plot = df_route.plot.points(
    'lon_origin', 'lat_origin',  
    hover_cols=['origin'],
    tiles=True, 
    geo=True, 
    frame_width=600, 
)

plot

In [103]:
import geoviews as gv
from cartopy import crs

def f(args):
    x1s, y1s, x2s, y2s = args
    proj = ccrs.GOOGLE_MERCATOR
    x1s, y1s, _ = proj.transform_points(ccrs.PlateCarree(), x1s.to_numpy(), y1s.to_numpy()).T
    x2s, y2s, _ = proj.transform_points(ccrs.PlateCarree(), x2s.to_numpy(), y2s.to_numpy()).T
    
    res = []
    for x1, y1, x2, y2 in zip(x1s, y1s, x2s, y2s):
        res.append(shapely.geometry.LineString([(x1, y1), (x2, y2)]))
    return pl.Series(res)

gdf =  gpd.GeoDataFrame(df_route.select(
    'len',
    pl.map_batches(['lon_origin', 'lat_origin', 'lon_dest', 'lat_dest'], f).alias('geometry')
).to_pandas())

plot =gdf.hvplot.paths(c='len', tiles=True, geo=False, frame_width=600)
plot.opts(opts.Path(cmap='PiYG'))