# EC2 Kafka + SingleStore Ingestion

### Database Configuration

In [None]:
%%sql
CREATE DATABASE IF NOT EXISTS ec2_kafka;

In [None]:
%%sql
USE ec2_kafka;

### Table Creation

In [None]:
%%sql
CREATE TABLE vehicle_log (
    event_id VARCHAR(50), 
    timestamp DATETIME,
    event_type VARCHAR(50),
    description VARCHAR(50),
    related_vehicle_id VARCHAR(50),
    additional_info JSON
);

In [None]:
%%sql
CREATE TABLE browser_log (
    event_id VARCHAR(50),
    timestamp DATETIME,
    event_type VARCHAR(50),
    page VARCHAR(50),
    browser VARCHAR(50),
    metadata JSON
);

In [None]:
%%sql
CREATE TABLE user_info (
    user_id VARCHAR(50),
    sign_up DATETIME,
    user_type VARCHAR(50),
    email VARCHAR(50),
    phone_number VARCHAR(50)
);

### Pipeline Creation

In [None]:
%%sql
SET @EC2_PUBLIC_IP := "<EC2_PUBLIC_IP>"

In [None]:
%%sql
CREATE OR REPLACE PROCEDURE create_pipelines(
    EC2_PUBLIC_IP TEXT,
    S2_PIPELINE_NAME TEXT,
    TOPIC_NAME TEXT,
    TABLE_STATEMENT TEXT
)
AS
BEGIN    
    EXECUTE IMMEDIATE CONCAT("CREATE OR REPLACE PIPELINE ", S2_PIPELINE_NAME ," AS LOAD DATA KAFKA '", EC2_PUBLIC_IP, "/", TOPIC_NAME, "' INTO TABLE ", TABLE_STATEMENT, " FORMAT JSON;");
END;

In [None]:
%%sql
CALL create_pipelines(
    @EC2_PUBLIC_IP,
    "vehicle_log_pipeline",
    "vehicle_topic", -- change as needed
    "vehicle_log(event_id <- event_id, timestamp <- timestamp, event_type <- event_type, description <- description, related_vehicle_id <- related_vehicle_id, additional_info <- additional_info)"
);

In [None]:
%%sql
CALL create_pipelines(
    @EC2_PUBLIC_IP,
    "browser_log_pipeline",
    "browser_topic", -- change as needed
    "browser_log (event_id <- event_id, timestamp <- timestamp, event_type <- event_type, page <- page, browser <- browser, metadata <- metadata)"
);

In [None]:
%%sql
CALL create_pipelines(
    @EC2_PUBLIC_IP,
    "user_info_pipeline",
    "user_topic", -- change as needed
    "user_info(user_id <- user_id, sign_up <- sign_up, user_type <- user_type, email <- email, phone_number <- phone_number)"
);

In [None]:
%%sql
SHOW PIPELINES;

In [None]:
%%sql
START ALL PIPELINES;

### Verify

In [None]:
%%sql
SELECT COUNT(*) FROM vehicle_log;

In [None]:
%%sql
SELECT * FROM vehicle_log LIMIT 5;

In [None]:
%%sql
SELECT COUNT(*) FROM browser_log;

In [None]:
%%sql
SELECT * FROM browser_log LIMIT 5;

In [None]:
%%sql
SELECT COUNT(*) FROM user_info;

In [None]:
%%sql
SELECT * FROM user_info LIMIT 5;

### Cleanup

In [None]:
%%sql
STOP ALL PIPELINES;

In [None]:
%%sql
DROP DATABASE ec2_kafka;