In [1]:
import duckdb
import pandas as pd
import plotly.express as px
from plotly_calplot import calplot

from common.env_variables import DUCKDB_DWH_FILE


def display_df(_df):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None, "expand_frame_repr", False,
                           "display.float_format", '${:,.2f}'.format):
        display(_df.fillna('.'))


def display_sql(sql_statement):
    conn = duckdb.connect(DUCKDB_DWH_FILE, read_only=True)
    _df = conn.execute(sql_statement).df()
    display_df(_df)
    conn.close()
    return _df


In [2]:
_ = display_sql(f'''
SELECT job_id,
       COUNT(1) AS total
  FROM curated.job
 GROUP BY 1
 ORDER BY 2 DESC
 LIMIT 5
''')

Unnamed: 0,job_id,total
0,7543521,12
1,7369771,10
2,7723680,9
3,7599993,8
4,7571802,8


In [3]:
_ = display_sql(f'''
SELECT load_timestamp,
       title,
       online_date
  FROM curated.job
 WHERE job_id = 7543521
 ORDER BY load_timestamp DESC
 LIMIT 20
''')

Unnamed: 0,load_timestamp,title,online_date
0,2022-01-26 16:26:20,Vertriebsmitarbeiter/in Innendienst (m/w/d),2022-01-02T13:03:06Z
1,2022-01-20 10:00:00,Innendienst Vertrieb Ausstellung (m/w/d),2022-01-02T13:03:06Z
2,2022-01-10 19:00:00,Mitarbeiter/in Vertrieb Ausstellung (m/w/d),2022-01-02T13:03:06Z
3,2022-01-08 13:00:00,Berater Ausstellung (m/w/d),2022-01-02T13:03:06Z
4,2021-12-18 14:00:00,Verkaufsberater Ausstellung (m/w/d),2021-12-18T13:03:05Z
5,2021-11-20 11:00:00,Berater Ausstellung (m/w/d),2021-11-13T17:03:10Z
6,2021-11-12 16:00:00,Fachberater Ausstellung (m/w/d),2021-10-29T15:30:01Z
7,2021-11-10 17:00:00,Fachberater - Glaser / Schreiner (m/w/d),2021-10-29T15:30:01Z
8,2021-10-14 21:00:00,Kaufmännische/r Angestellte/r (m/w/d),2021-10-06T15:03:04Z
9,2021-10-07 08:00:00,Kaufmännische/r Angestellte/r (m/w/d),2021-10-06T15:03:04Z


In [4]:
_ = display_sql(f'''
SELECT job_id,
       load_timestamp
FROM (
    SELECT j.*,
           row_number()
           OVER (
            PARTITION BY job_id ORDER BY load_timestamp DESC
           ) AS seqnum
      FROM curated.job j
     WHERE job_id = 7543521
) j
WHERE seqnum = 1;
''')


Unnamed: 0,job_id,load_timestamp
0,7543521,2022-01-26 16:26:20
