In [None]:
!pip install polars

In [None]:
import polars as pl
import time
# to enrich the examples in this quickstart with dates
from datetime import datetime, timedelta 
# to generate data for the examples
import numpy as np 
import pandas as pd

In [None]:
dataframe = pl.DataFrame({"integer": [1, 2, 3], 
                          "date": [
                              (datetime(2022, 1, 1)), 
                              (datetime(2022, 1, 2)), 
                              (datetime(2022, 1, 3))
                          ], 
                          "float":[4.0, 5.0, 6.0]})

print(dataframe)

In [None]:
df_pl_customer = pl.read_csv('../data/customer', separator="|",has_header=False, new_columns=["customer_id","customer_fname","customer_lname","customer_email","customer_password","customer_street","customer_city","customer_state","customer_zipcode"])

In [None]:
df_pl_orders = pl.read_csv('../data/orders',separator='|', has_header=False, new_columns=["order_id","order_date","order_customer_id","order_status"])


In [None]:
df_pl_order_items = pl.read_csv('../data/order_items',separator='|', has_header=False, new_columns=["order_item_id","order_item_order_id","order_item_product_id","order_item_quantity","order_item_subtotal","order_item_product_price"])


In [None]:
df_pd_customer = pd.read_csv('../data/customer', sep="|",header=None, names=["customer_id","customer_fname","customer_lname","customer_email","customer_password","customer_street","customer_city","customer_state","customer_zipcode"])

In [None]:
df_pd_orders = pd.read_csv('../data/orders',sep='|', header=None, names=["order_id","order_date","order_customer_id","order_status"])


In [None]:
df_pd_order_items = pd.read_csv('../data/order_items',sep='|', header=None, names=["order_item_id","order_item_order_id","order_item_product_id","order_item_quantity","order_item_subtotal","order_item_product_price"])


In [None]:
df_pd_customer

In [None]:
%%time
df_pd_customer.merge(df_pd_orders, left_on='customer_id', right_on='order_customer_id', how='left').query("customer_street =='9526 Noble Embers Ridge'")

In [None]:
%%time
df_pl_customer.join(df_pl_orders, left_on="customer_id", right_on="order_customer_id", how="left").filter(pl.col("customer_street") == "9526 Noble Embers Ridge")

In [None]:
df = pl.DataFrame(
    data=[
        ("The Godfather", 1972, 6_000_000, 134_821_952, 9.2),
        ("The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0),
        ("Schindler's List", 1993, 22_000_000, 96_067_179, 8.9),
        ("Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9),
        ("The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3),
    ],
    schema=["title", "release_year", "budget", "gross", "imdb_score"],
)
ctx = pl.SQLContext(films=df)

In [None]:
ctx.execute(
    '''
    SELECT title, release_year, imdb_score
    FROM films
    WHERE release_year > 1990
    ORDER BY imdb_score DESC
    ''',
    eager=True,
)

In [None]:
ctx.execute(
    '''
    SELECT
        MAX(release_year / 10) * 10 AS decade,
        SUM(gross) AS total_gross,
        COUNT(title) AS n_films,
    FROM films
    GROUP BY (release_year / 10) -- decade
    ORDER BY total_gross DESC
    ''',
    eager=True,
)

In [None]:
df = pl.DataFrame({"hello": ["world"]})
ctx = pl.SQLContext()
ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect()

In [None]:
lf1 = pl.LazyFrame({"a": [1, 2, 3], "b": ["m", "n", "o"]})
lf2 = pl.LazyFrame({"a": [2, 3, 4], "c": ["p", "q", "r"]})
lf3 = pl.LazyFrame({"a": [3, 4, 5], "b": ["s", "t", "u"]})
lf4 = pl.LazyFrame({"a": [4, 5, 6], "c": ["v", "w", "x"]})

In [None]:
ctx = pl.SQLContext().register_many({"tbl1": lf1, "tbl2": lf2})

In [None]:
pl_df = ctx.execute("SELECT * FROM tbl1").collect()

In [None]:
ctx = pl.SQLContext().register_many({"customers": df_pl_customer, "orders": df_pl_orders})

In [None]:
ctx.execute("SELECT * FROM customers LEFT JOIN orders ON customer_id = order_customer_id").collect()