In [2]:
craftsman_report_datamart = read_from_postgres("dwh", "craftsman_report_datamart")

# Ключевые столбцы для идентификации строки
key_columns = ["craftsman_id", "report_period"]

# 1. Определяем новые строки (INSERT)
new_rows_df = new_craftsman_report_datamart_df.join(
    craftsman_report_datamart.select(*key_columns),
    key_columns,
    how="left_anti"
)

# 2. Определяем обновленные строки (UPDATE)
updated_rows_df = new_craftsman_report_datamart_df.alias("new").join(
    craftsman_report_datamart.alias("existing"),
    key_columns,
    how="inner"
).filter(
    # Проверяем, есть ли расхождения в значимых столбцах
    (col("new.craftsman_money") != col("existing.craftsman_money")) |
    (col("new.platform_money") != col("existing.platform_money")) |
    (col("new.count_order") != col("existing.count_order")) |
    (col("new.avg_price_order") != col("existing.avg_price_order")) |
    (col("new.avg_age_customer") != col("existing.avg_age_customer")) |
    (col("new.median_time_order_completed") != col("existing.median_time_order_completed")) |
    (col("new.count_order_created") != col("existing.count_order_created")) |
    (col("new.count_order_in_progress") != col("existing.count_order_in_progress")) |
    (col("new.count_order_delivery") != col("existing.count_order_delivery")) |
    (col("new.count_order_done") != col("existing.count_order_done")) |
    (col("new.count_order_not_done") != col("existing.count_order_not_done")) |
    (col("new.top_product_category") != col("existing.top_product_category"))
).select("new.*")

In [3]:
!pip install psycopg2-binary
import psycopg2
from psycopg2.extras import execute_values
from pyspark.sql import SparkSession, Window, Row


def update_existing_rows(df, schema, table):
    # Преобразуем DataFrame в список словарей
    rows = df.collect()
    update_query = f"""
    UPDATE {schema}.{table} AS target
    SET craftsman_money = data.craftsman_money,
        platform_money = data.platform_money,
        count_order = data.count_order,
        avg_price_order = data.avg_price_order,
        avg_age_customer = data.avg_age_customer,
        median_time_order_completed = data.median_time_order_completed,
        count_order_created = data.count_order_created,
        count_order_in_progress = data.count_order_in_progress,
        count_order_delivery = data.count_order_delivery,
        count_order_done = data.count_order_done,
        count_order_not_done = data.count_order_not_done,
        top_product_category = data.top_product_category,
        load_dttm = data.load_dttm
    FROM (VALUES %s) AS data (
        craftsman_id, report_period, craftsman_money, platform_money, count_order, avg_price_order,
        avg_age_customer, median_time_order_completed, count_order_created, count_order_in_progress,
        count_order_delivery, count_order_done, count_order_not_done, top_product_category, load_dttm
    )
    WHERE target.craftsman_id = data.craftsman_id AND target.report_period = data.report_period
    """
    conn = psycopg2.connect(
        dbname="postgres",
        user="postgres_user",
        password="postgres_password",
        host="postgres",
        port="5432"
    )
    with conn.cursor() as cursor:
        values = [
            (
                row["craftsman_id"], row["report_period"], row["craftsman_money"], row["platform_money"],
                row["count_order"], row["avg_price_order"], row["avg_age_customer"],
                row["median_time_order_completed"], row["count_order_created"],
                row["count_order_in_progress"], row["count_order_delivery"], row["count_order_done"],
                row["count_order_not_done"], row["top_product_category"], row["load_dttm"]
            )
            for row in rows
        ]
        execute_values(cursor, update_query, values)
    conn.commit()
    conn.close()


Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Downloading psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.9.10


In [5]:
# Обновление таблицы инкрементальных загрузок
load_dates_dma = read_from_postgres("dwh", "load_dates_craftsman_report_datamart")

load_dates_data = [Row(load_dttm=date.today())]  # Используем текущую дату из модуля datetime
load_dates_df = spark.createDataFrame(load_dates_data).exceptAll(load_dates_dma.select('load_dttm'))

# Запись в таблицу
write_to_postgres(load_dates_df, "dwh", "load_dates_craftsman_report_datamart")

read_from_postgres("dwh", "load_dates_craftsman_report_datamart").toPandas().sort_values(by='load_dttm', ascending=False).head(10)

Unnamed: 0,id,load_dttm
0,1,2025-02-20


In [4]:
write_to_postgres(new_rows_df.drop("load_dttm"), "dwh", "craftsman_report_datamart")

In [1]:
from pyspark.sql import SparkSession, Window
from pyspark.sql.functions import (
    current_date, date_format, col, sum, avg, count, expr,
    row_number, desc, when, median
)
from datetime import date

# Создаем Spark-сессию и подключаем JDBC-драйвер для PostgreSQL
spark = SparkSession.builder \
    .appName("Postgres-Spark") \
    .config("spark.jars.packages", "org.postgresql:postgresql:42.5.4") \
    .getOrCreate()

# Параметры подключения к PostgreSQL
jdbc_url = "jdbc:postgresql://postgres:5432/postgres_db"
connection_properties = {
    "user": "postgres_user",
    "password": "postgres_password",
    "driver": "org.postgresql.Driver"
}

# Функция для чтения данных из PostgreSQL
def read_from_postgres(schema, table):
    return spark.read.format("jdbc") \
        .option("url", jdbc_url) \
        .option("driver", connection_properties["driver"]) \
        .option("dbtable", f"{schema}.{table}") \
        .option("user", connection_properties["user"]) \
        .option("password", connection_properties["password"]) \
        .load()

# Функция для записи данных в PostgreSQL
def write_to_postgres(df, schema, table):
    df.write.format("jdbc") \
        .option("url", jdbc_url) \
        .option("driver", connection_properties["driver"]) \
        .option("dbtable", f"{schema}.{table}") \
        .option("user", connection_properties["user"]) \
        .option("password", connection_properties["password"]) \
        .mode("append") \
        .save()

# =====================================================
# 1. Чтение и преобразование исходных данных
# =====================================================

# Определяем целевую схему (порядок столбцов):
# [order_id, order_created_date, order_completion_date, order_status,
#  craftsman_id, craftsman_name, craftsman_address, craftsman_birthday, craftsman_email,
#  product_id, product_name, product_description, product_type, product_price,
#  customer_id, customer_name, customer_address, customer_birthday, customer_email]

# --- Источник 1 ---
# Читаем таблицу из схемы source1 и выбираем столбцы согласно целевой схеме
source1_df = read_from_postgres("source1", "craft_market_wide")
source1_df = source1_df.select(
    "order_id",
    "order_created_date",
    "order_completion_date",
    "order_status",
    "craftsman_id",
    "craftsman_name",
    "craftsman_address",
    "craftsman_birthday",
    "craftsman_email",
    "product_id",
    "product_name",
    "product_description",
    "product_type",
    "product_price",
    "customer_id",
    "customer_name",
    "customer_address",
    "customer_birthday",
    "customer_email"
)

# --- Источник 2 ---
# Читаем две таблицы из схемы source2: одна с информацией о мастерах и продуктах,
# другая – с информацией о заказах и клиентах.
source2_masters_products = read_from_postgres("source2", "craft_market_masters_products")
source2_orders_customers = read_from_postgres("source2", "craft_market_orders_customers")

# Объединяем таблицы по product_id и craftsman_id и приводим к целевой схеме:
source2_df = source2_masters_products.join(
    source2_orders_customers,
    (source2_masters_products.product_id == source2_orders_customers.product_id) &
    (source2_masters_products.craftsman_id == source2_orders_customers.craftsman_id)
).select(
    # Сначала данные заказа (берутся из orders_customers)
    source2_orders_customers.order_id,
    source2_orders_customers.order_created_date,
    source2_orders_customers.order_completion_date,
    source2_orders_customers.order_status,
    # Затем данные мастера (берутся из masters_products)
    source2_masters_products.craftsman_id,
    source2_masters_products.craftsman_name,
    source2_masters_products.craftsman_address,
    source2_masters_products.craftsman_birthday,
    source2_masters_products.craftsman_email,
    # Затем данные о продукте
    source2_masters_products.product_id,
    source2_masters_products.product_name,
    source2_masters_products.product_description,
    source2_masters_products.product_type,
    source2_masters_products.product_price,
    # Затем данные клиента (берутся из orders_customers)
    source2_orders_customers.customer_id,
    source2_orders_customers.customer_name,
    source2_orders_customers.customer_address,
    source2_orders_customers.customer_birthday,
    source2_orders_customers.customer_email
)

# --- Источник 3 ---
# Читаем таблицы заказов, мастеров и клиентов из схемы source3
source3_orders = read_from_postgres("source3", "craft_market_orders")
source3_craftsmans = read_from_postgres("source3", "craft_market_craftsmans")
source3_customers = read_from_postgres("source3", "craft_market_customers")

# Объединяем заказы с мастерами и клиентами и приводим к целевой схеме.
source3_df = source3_orders.alias("o").join(
    source3_craftsmans.alias("c"),
    col("o.craftsman_id") == col("c.craftsman_id")
).join(
    source3_customers.alias("cust"),
    col("o.customer_id") == col("cust.customer_id")
).select(
    col("o.order_id").alias("order_id"),
    col("o.order_created_date").alias("order_created_date"),
    col("o.order_completion_date").alias("order_completion_date"),
    col("o.order_status").alias("order_status"),
    col("c.craftsman_id").alias("craftsman_id"),
    col("c.craftsman_name").alias("craftsman_name"),
    col("c.craftsman_address").alias("craftsman_address"),
    col("c.craftsman_birthday").alias("craftsman_birthday"),
    col("c.craftsman_email").alias("craftsman_email"),
    col("o.product_id").alias("product_id"),
    col("o.product_name").alias("product_name"),
    col("o.product_description").alias("product_description"),
    col("o.product_type").alias("product_type"),
    col("o.product_price").alias("product_price"),
    col("cust.customer_id").alias("customer_id"),
    col("cust.customer_name").alias("customer_name"),
    col("cust.customer_address").alias("customer_address"),
    col("cust.customer_birthday").alias("customer_birthday"),
    col("cust.customer_email").alias("customer_email")
)

# Объединяем данные из всех источников и удаляем дубликаты
all_sources_df = source1_df.union(source2_df).union(source3_df).distinct()

# =====================================================
# 2. Обработка измерения d_customers (клиенты)
# =====================================================

# Считываем текущие записи из таблицы d_customers
d_customers_df = read_from_postgres("dwh", "d_customers")
# Для стабильного бизнес-ключа берем только customer_name и customer_birthday
customers_key_columns = ['customer_name', 'customer_birthday']

# Определяем новых клиентов: выбираем уникальные комбинации ключевых полей из объединенных данных,
# которые отсутствуют в измерении. (Изменения в адресе и email не учитываются.)
new_customers_df = (
    all_sources_df.select(customers_key_columns).distinct()
    .exceptAll(d_customers_df.select(customers_key_columns))
).cache()

# Загружаем новые записи в таблицу d_customers
write_to_postgres(new_customers_df, "dwh", "d_customers")
# Перечитываем обновленные данные из d_customers
d_customers_df = read_from_postgres("dwh", "d_customers")

# Присоединяем surrogate-ключ (customer_id) по стабильному ключу
new_customers_df = new_customers_df.alias("new_cust").join(
    d_customers_df.alias("d_cust"),
    (col("new_cust.customer_name") == col("d_cust.customer_name")) &
    (col("new_cust.customer_birthday") == col("d_cust.customer_birthday")),
    how='left'
).select(
    col("d_cust.customer_id").alias("customer_id"),
    col("new_cust.customer_name"),
    col("new_cust.customer_birthday")
)

# Обновляем основной DataFrame, присоединяя правильный customer_id
all_sources_df = all_sources_df.alias("all_src").join(
    d_customers_df.alias("d_cust"),
    (col("all_src.customer_name") == col("d_cust.customer_name")) &
    (col("all_src.customer_birthday") == col("d_cust.customer_birthday")),
    how='left'
).select(
    col("d_cust.customer_id").alias("customer_id"),
    col("all_src.order_id"),
    col("all_src.order_created_date"),
    col("all_src.order_completion_date"),
    col("all_src.order_status"),
    col("all_src.craftsman_id"),
    col("all_src.craftsman_name"),
    col("all_src.craftsman_address"),
    col("all_src.craftsman_birthday"),
    col("all_src.craftsman_email"),
    col("all_src.product_id"),
    col("all_src.product_name"),
    col("all_src.product_description"),
    col("all_src.product_type"),
    col("all_src.product_price"),
    col("all_src.customer_name"),
    col("all_src.customer_address"),
    col("all_src.customer_birthday"),
    col("all_src.customer_email")
)

# =====================================================
# 3. Обработка измерения d_products (продукты)
# =====================================================

# Считываем текущие записи из d_products
d_products_df = read_from_postgres("dwh", "d_products")
# Бизнес-ключ для продуктов – все атрибуты продукта
products_key_columns = ['product_name', 'product_description', 'product_type', 'product_price']

# Определяем новые продукты по бизнес-ключу
new_products_df = (
    all_sources_df.select(products_key_columns).distinct()
    .exceptAll(d_products_df.select(products_key_columns))
).cache()

# Загружаем новые записи в d_products
write_to_postgres(new_products_df, "dwh", "d_products")
# Перечитываем обновленные данные
d_products_df = read_from_postgres("dwh", "d_products")

# Присоединяем surrogate-ключ product_id по бизнес-ключу
new_products_df = new_products_df.alias("new_prod").join(
    d_products_df.alias("d_prod"),
    (col("new_prod.product_name") == col("d_prod.product_name")) &
    (col("new_prod.product_description") == col("d_prod.product_description")) &
    (col("new_prod.product_type") == col("d_prod.product_type")) &
    (col("new_prod.product_price") == col("d_prod.product_price")),
    how='left'
).select(
    col("d_prod.product_id").alias("product_id"),
    col("new_prod.product_name"),
    col("new_prod.product_description"),
    col("new_prod.product_type"),
    col("new_prod.product_price")
)

# Обновляем основной DataFrame, присоединяя корректный product_id
all_sources_df = all_sources_df.alias("all_src").join(
    d_products_df.alias("d_prod"),
    (col("all_src.product_name") == col("d_prod.product_name")) &
    (col("all_src.product_description") == col("d_prod.product_description")) &
    (col("all_src.product_type") == col("d_prod.product_type")) &
    (col("all_src.product_price") == col("d_prod.product_price")),
    how='left'
).select(
    col("d_prod.product_id").alias("product_id"),
    col("all_src.order_id"),
    col("all_src.order_created_date"),
    col("all_src.order_completion_date"),
    col("all_src.order_status"),
    col("all_src.craftsman_id"),
    col("all_src.craftsman_name"),
    col("all_src.craftsman_address"),
    col("all_src.craftsman_birthday"),
    col("all_src.craftsman_email"),
    col("all_src.customer_id"),
    col("all_src.product_name"),
    col("all_src.product_description"),
    col("all_src.product_type"),
    col("all_src.product_price"),
    col("all_src.customer_name"),
    col("all_src.customer_address"),
    col("all_src.customer_birthday"),
    col("all_src.customer_email")
)

# =====================================================
# 4. Обработка измерения d_craftsmans (мастера)
# =====================================================

# Считываем текущие данные d_craftsmans
d_craftsmans_df = read_from_postgres("dwh", "d_craftsmans")
# Бизнес-ключ для мастеров – name и birthday
craftsmans_key_columns = ['craftsman_name', 'craftsman_birthday']

# Определяем новых мастеров по бизнес-ключу
new_craftsmans_df = (
    all_sources_df.select(craftsmans_key_columns).distinct()
    .exceptAll(d_craftsmans_df.select(craftsmans_key_columns))
).cache()

# Загружаем новые мастера в d_craftsmans
write_to_postgres(new_craftsmans_df, "dwh", "d_craftsmans")
# Перечитываем обновленные данные
d_craftsmans_df = read_from_postgres("dwh", "d_craftsmans")

# Присоединяем surrogate-ключ craftsman_id по бизнес-ключу
new_craftsmans_df = new_craftsmans_df.alias("new_cr").join(
    d_craftsmans_df.alias("d_cr"),
    (col("new_cr.craftsman_name") == col("d_cr.craftsman_name")) &
    (col("new_cr.craftsman_birthday") == col("d_cr.craftsman_birthday")),
    how='left'
).select(
    col("d_cr.craftsman_id").alias("craftsman_id"),
    col("new_cr.craftsman_name"),
    col("new_cr.craftsman_birthday")
)

# Обновляем основной DataFrame, присоединяя корректный craftsman_id
all_sources_df = all_sources_df.alias("all_src").join(
    d_craftsmans_df.alias("d_cr"),
    (col("all_src.craftsman_name") == col("d_cr.craftsman_name")) &
    (col("all_src.craftsman_birthday") == col("d_cr.craftsman_birthday")),
    how='left'
).select(
    col("d_cr.craftsman_id").alias("craftsman_id"),
    col("all_src.order_id"),
    col("all_src.order_created_date"),
    col("all_src.order_completion_date"),
    col("all_src.order_status"),
    col("all_src.product_id"),
    col("all_src.craftsman_name"),
    col("all_src.craftsman_address"),
    col("all_src.craftsman_birthday"),
    col("all_src.craftsman_email"),
    col("all_src.customer_id"),
    col("all_src.product_name"),
    col("all_src.product_description"),
    col("all_src.product_type"),
    col("all_src.product_price"),
    col("all_src.customer_name"),
    col("all_src.customer_address"),
    col("all_src.customer_birthday"),
    col("all_src.customer_email")
)

# =====================================================
# 5. Обработка факт-таблицы f_orders (заказы)
# =====================================================

# Считываем текущие заказы из f_orders
f_orders_df = read_from_postgres("dwh", "f_orders")
# Определяем ключевые столбцы для фактов
orders_key_columns = [
    'product_id', 'craftsman_id', 'customer_id',
    'order_created_date', 'order_completion_date', 'order_status'
]

# Определяем новые заказы, отсутствующие в f_orders, по ключевым столбцам
new_orders_df = (
    all_sources_df.select(orders_key_columns).distinct()
    .exceptAll(f_orders_df.select(orders_key_columns))
).cache()

# Загружаем новые заказы в f_orders
write_to_postgres(new_orders_df, "dwh", "f_orders")
# Перечитываем обновленные заказы
f_orders_df = read_from_postgres("dwh", "f_orders")

# Присоединяем surrogate-ключ order_id из f_orders по ключу
new_orders_df = new_orders_df.alias("new_ord").join(
    f_orders_df.alias("f_ord"),
    (col("new_ord.product_id") == col("f_ord.product_id")) &
    (col("new_ord.craftsman_id") == col("f_ord.craftsman_id")) &
    (col("new_ord.customer_id") == col("f_ord.customer_id")) &
    (col("new_ord.order_created_date") == col("f_ord.order_created_date")) &
    (col("new_ord.order_completion_date") == col("f_ord.order_completion_date")) &
    (col("new_ord.order_status") == col("f_ord.order_status")),
    how='left'
).select(
    col("f_ord.order_id").alias("order_id"),
    col("new_ord.product_id"),
    col("new_ord.craftsman_id"),
    col("new_ord.customer_id"),
    col("new_ord.order_created_date"),
    col("new_ord.order_completion_date"),
    col("new_ord.order_status")
)

# Обновляем основной DataFrame, присоединяя корректный order_id
all_sources_df = all_sources_df.alias("all_src").join(
    f_orders_df.alias("f_ord"),
    (col("all_src.product_id") == col("f_ord.product_id")) &
    (col("all_src.craftsman_id") == col("f_ord.craftsman_id")) &
    (col("all_src.customer_id") == col("f_ord.customer_id")) &
    (col("all_src.order_created_date") == col("f_ord.order_created_date")) &
    (col("all_src.order_completion_date") == col("f_ord.order_completion_date")) &
    (col("all_src.order_status") == col("f_ord.order_status")),
    how='left'
).select(
    col("f_ord.order_id").alias("order_id"),
    col("all_src.craftsman_id"),
    col("all_src.order_created_date"),
    col("all_src.order_completion_date"),
    col("all_src.order_status"),
    col("all_src.product_id"),
    col("all_src.craftsman_name"),
    col("all_src.craftsman_address"),
    col("all_src.craftsman_birthday"),
    col("all_src.craftsman_email"),
    col("all_src.customer_id"),
    col("all_src.product_name"),
    col("all_src.product_description"),
    col("all_src.product_type"),
    col("all_src.product_price"),
    col("all_src.customer_name"),
    col("all_src.customer_address"),
    col("all_src.customer_birthday"),
    col("all_src.customer_email")
)

# =====================================================
# 6. Формирование инкрементальной таблицы для отчетов (datamart)
# =====================================================

# Добавляем столбец report_period в формате YYYY-MM на основе order_created_date
new_orders_df = new_orders_df.withColumn("report_period", date_format("order_created_date", "yyyy-MM"))

# Определяем оконную функцию для выбора топовой категории товаров по мастеру и периоду
window_spec = Window.partitionBy("craftsman_id", "report_period").orderBy(desc("count_category"))

# Считаем количество заказов по каждой категории (product_type) для каждого мастера и отчетного периода
product_category_counts = (
    new_orders_df.join(
        new_products_df.alias("products"),
        new_orders_df.product_id == col("products.product_id")
    )
    .groupBy("craftsman_id", "report_period", "products.product_type")
    .agg(count("products.product_type").alias("count_category"))
)

# Выбираем топ-1 категорию товаров (с максимальным количеством заказов) для каждого мастера и периода
top_categories = (
    product_category_counts
    .withColumn("row_num", row_number().over(window_spec))
    .filter(col("row_num") == 1)
    .select("craftsman_id", "report_period", "product_type")
)

# Рассчитываем агрегаты для отчета по мастерам
new_craftsman_report_datamart_df = (
    new_orders_df.join(
        new_craftsmans_df.alias("craftsman"),
        new_orders_df.craftsman_id == col("craftsman.craftsman_id")
    )
    .join(
        new_products_df.alias("products"),
        new_orders_df.product_id == col("products.product_id")
    )
    .join(
        new_customers_df.alias("customers"),
        new_orders_df.customer_id == col("customers.customer_id")
    )
    .groupBy(
        col("craftsman.craftsman_id"),
        col("craftsman.craftsman_name"),
        col("craftsman.craftsman_address"),
        col("craftsman.craftsman_birthday"),
        col("craftsman.craftsman_email"),
        col("report_period")
    )
    .agg(
        # 90% от цены товара для мастера, 10% для платформы
        sum(col("products.product_price") * 0.9).alias("craftsman_money"),
        sum(col("products.product_price") * 0.1).alias("platform_money"),
        count(new_orders_df.order_id).alias("count_order"),
        avg(col("products.product_price")).alias("avg_price_order"),
        # При расчете возраста используем 365.25 для учета високосных годов
        avg(expr("DATEDIFF(current_date(), customers.customer_birthday) / 365.25")).alias("avg_age_customer"),
        # Медианное время выполнения заказа (в днях)
        median(expr("DATEDIFF(order_completion_date, order_created_date)")).alias("median_time_order_completed"),
        sum(when(new_orders_df.order_status == "created", 1).otherwise(0)).alias("count_order_created"),
        sum(when(new_orders_df.order_status == "in_progress", 1).otherwise(0)).alias("count_order_in_progress"),
        sum(when(new_orders_df.order_status == "delivery", 1).otherwise(0)).alias("count_order_delivery"),
        sum(when(new_orders_df.order_status == "done", 1).otherwise(0)).alias("count_order_done"),
        sum(when(new_orders_df.order_status != "done", 1).otherwise(0)).alias("count_order_not_done")
    )
)

# Присоединяем информацию о топовой категории товаров для каждого мастера и отчетного периода.
new_craftsman_report_datamart_df = new_craftsman_report_datamart_df.join(
    top_categories,
    ["craftsman_id", "report_period"],
    "left"
).withColumnRenamed("product_type", "top_product_category")
# (load_dttm не добавляется, так как колонка не используется в дальнейшей логике)

# =====================================================
# 7. Проверка результата: вывод первых 5 строк итогового отчета
# =====================================================
new_craftsman_report_datamart_df.toPandas().head(5)


Unnamed: 0,craftsman_id,report_period,craftsman_name,craftsman_address,craftsman_birthday,craftsman_email,craftsman_money,platform_money,count_order,avg_price_order,avg_age_customer,median_time_order_completed,count_order_created,count_order_in_progress,count_order_delivery,count_order_done,count_order_not_done,top_product_category,load_dttm
0,100,2021-01,Rafe Torbeck,1296 Farragut Plaza,1998-02-12,htillerjq@adobe.com,143.1,15.9,0,159.0,26.973306,,1,0,0,0,1,clothes,2025-02-20 13:33:27.961950
1,143,2020-02,Myriam Knocker,2 Paget Center,1998-12-17,cdecreuzeqp@sciencedaily.com,226.8,25.2,0,252.0,21.571526,,0,0,0,0,1,Beauty & Hygiene,2025-02-20 13:33:27.961950
2,241,2021-08,Hart Elintune,07 Rusk Parkway,2002-02-16,mropkins2q@biblegateway.com,91.8,10.2,0,102.0,24.629706,,0,0,1,0,1,clothes,2025-02-20 13:33:27.961950
3,854,2022-11,Myron Sinnock,90698 Rusk Way,1996-07-30,jharloweg6@youku.com,51.3,5.7,0,57.0,20.982888,,0,0,1,0,1,clothes,2025-02-20 13:33:27.961950
4,1279,2022-07,Pryce Gilbard,21 Westend Alley,1991-11-07,ljudkinfw@upenn.edu,65.7,7.3,1,73.0,28.665298,3.0,0,0,0,1,0,clothes,2025-02-20 13:33:27.961950
