# EC2 Kafka + SingleStore Ingestion

### Database Configuration

In [14]:
%%sql
CREATE DATABASE IF NOT EXISTS ec2_kafka;

In [15]:
%%sql
USE ec2_kafka;

### Table Creation

In [19]:
%%sql
CREATE TABLE vehicle_log (
    event_id VARCHAR(50), 
    timestamp DATETIME,
    event_type VARCHAR(50),
    description VARCHAR(50),
    related_vehicle_id VARCHAR(50),
    additional_info JSON
);

RuntimeError: (singlestoredb.exceptions.OperationalError) 1050: Forwarding Error (node-f491ae25-2a57-4cd1-9e48-0b5d4d27f0fc-master-0.svc-f491ae25-2a57-4cd1-9e48-0b5d4d27f0fc:3306): Table 'vehicle_log' already exists
[SQL: CREATE TABLE vehicle_log (
    event_id VARCHAR(50), 
    timestamp DATETIME,
    event_type VARCHAR(50),
    description VARCHAR(50),
    related_vehicle_id VARCHAR(50),
    additional_info JSON
);]
(Background on this error at: https://sqlalche.me/e/20/e3q8)
If you need help solving this issue, send us a message: https://ploomber.io/community


In [20]:
%%sql
CREATE TABLE browser_log (
    event_id VARCHAR(50),
    timestamp DATETIME,
    event_type VARCHAR(50),
    page VARCHAR(50),
    browser VARCHAR(50),
    metadata JSON
);

RuntimeError: (singlestoredb.exceptions.OperationalError) 1050: Forwarding Error (node-f491ae25-2a57-4cd1-9e48-0b5d4d27f0fc-master-0.svc-f491ae25-2a57-4cd1-9e48-0b5d4d27f0fc:3306): Table 'browser_log' already exists
[SQL: CREATE TABLE browser_log (
    event_id VARCHAR(50),
    timestamp DATETIME,
    event_type VARCHAR(50),
    page VARCHAR(50),
    browser VARCHAR(50),
    metadata JSON
);]
(Background on this error at: https://sqlalche.me/e/20/e3q8)
If you need help solving this issue, send us a message: https://ploomber.io/community


In [21]:
%%sql
CREATE TABLE user_info (
    user_id VARCHAR(50),
    sign_up DATETIME,
    user_type VARCHAR(50),
    email VARCHAR(50),
    phone_number VARCHAR(50)
);

RuntimeError: (singlestoredb.exceptions.OperationalError) 1050: Forwarding Error (node-f491ae25-2a57-4cd1-9e48-0b5d4d27f0fc-master-0.svc-f491ae25-2a57-4cd1-9e48-0b5d4d27f0fc:3306): Table 'user_info' already exists
[SQL: CREATE TABLE user_info (
    user_id VARCHAR(50),
    sign_up DATETIME,
    user_type VARCHAR(50),
    email VARCHAR(50),
    phone_number VARCHAR(50)
);]
(Background on this error at: https://sqlalche.me/e/20/e3q8)
If you need help solving this issue, send us a message: https://ploomber.io/community


### Pipeline Creation

In [22]:
%%sql
CREATE OR REPLACE PIPELINE event_logs_kafka AS
LOAD DATA KAFKA '35.155.206.166/test_topic'
INTO TABLE vehicle_log
(
    event_id <- event_id,
    timestamp <- timestamp,
    event_type <- event_type,
    description <- description,
    related_vehicle_id <- related_vehicle_id,
    additional_info <- additional_info
)
FORMAT JSON;

In [23]:
%%sql
CREATE OR REPLACE PIPELINE event_logs_kafka_2 AS
LOAD DATA KAFKA '35.155.206.166/test_topic_2'
INTO TABLE browser_log
(
    event_id <- event_id,
    timestamp <- timestamp,
    event_type <- event_type,
    page <- page,
    browser <- browser,
    metadata <- metadata
)
FORMAT JSON;

In [24]:
%%sql
CREATE OR REPLACE PIPELINE event_logs_kafka_3 AS
LOAD DATA KAFKA '35.155.206.166/test_topic_3'
INTO TABLE user_info
(
    user_id <- user_id,
    sign_up <- sign_up,
    user_type <- user_type,
    email <- email,
    phone_number <- phone_number
)
FORMAT JSON;

In [25]:
%%sql
SHOW PIPELINES;

Pipelines_in_ec2_kafka,State,Scheduled
event_logs_kafka,Stopped,False
event_logs_kafka_2,Stopped,False
event_logs_kafka_3,Stopped,False


In [26]:
%%sql
START ALL PIPELINES;

### Verify

In [28]:
%%sql
SELECT COUNT(*) FROM vehicle_log;

COUNT(*)
1000


In [29]:
%%sql
SELECT * FROM vehicle_log LIMIT 5;

event_id,timestamp,event_type,description,related_vehicle_id,additional_info
event_be9deb180fd34b8f880aece4932b42da,2024-09-11 14:01:20,maintenance,Scheduled maintenance,vehicle_96,{'info': 'Additional event details'}
event_041ab99b67f34459b1da267af1228f61,2024-09-11 14:01:20,accident,Scheduled maintenance,vehicle_9,{'info': 'Additional event details'}
event_860b27adf0124ef2af6ab348431a8ce4,2024-09-11 14:01:20,other,Scheduled maintenance,vehicle_29,{'info': 'Additional event details'}
event_ef59e4832aaf4d2fbbfa3cfb520eb1d0,2024-09-11 14:01:20,maintenance,Scheduled maintenance,vehicle_95,{'info': 'Additional event details'}
event_15c0e8b8c0e649fda5b6d95f289fc067,2024-09-11 14:01:20,maintenance,Scheduled maintenance,vehicle_25,{'info': 'Additional event details'}


In [30]:
%%sql
SELECT COUNT(*) FROM browser_log;

COUNT(*)
1000


In [31]:
%%sql
SELECT * FROM browser_log LIMIT 5;

event_id,timestamp,event_type,page,browser,metadata
event_f2242e6b312641d5834b26c54b640a35,2024-09-11 14:01:46,scroll,Home,Chrome,"{'utm_medium': 'email', 'utm_source': 'outreach', 'utm_term': 's2'}"
event_95b3ea01ba1041a7b9eaaf3df2449db2,2024-09-11 14:01:46,scroll,Home,Microsoft Edge,"{'utm_medium': 'email', 'utm_source': 'outreach', 'utm_term': 's2'}"
event_6eb269d602854113ac9ee0ddecb83892,2024-09-11 14:01:46,button_click,SingleStore Portal,Chrome,"{'utm_medium': 'email', 'utm_source': 'outreach', 'utm_term': 's2'}"
event_fd0e095ecb8a4b26a5d2701a1f4a266c,2024-09-11 14:01:46,scroll,Home,Microsoft Edge,"{'utm_medium': 'email', 'utm_source': 'outreach', 'utm_term': 's2'}"
event_21c80f37231541a39eaf5a2152bb1e9a,2024-09-11 14:01:46,click,SingleStore Portal,Microsoft Edge,"{'utm_medium': 'email', 'utm_source': 'outreach', 'utm_term': 's2'}"


In [35]:
%%sql
SELECT COUNT(*) FROM user_info;

COUNT(*)
1000


In [36]:
%%sql
SELECT * FROM user_info LIMIT 5;

user_id,sign_up,user_type,email,phone_number
user_bc56ec344936470d9c4aeba8c33dce45,2024-09-11 14:02:05,Admin,example@yahoo.com,206-XXX-XXXX
user_48d3d79b0de84095b7aa6a4f4ef89d7e,2024-09-11 14:02:05,Moderator,example@yahoo.com,415-XXX-XXXX
user_bdd9f3705684480fbbed30ba3c735b79,2024-09-11 14:02:05,Admin,example@yahoo.com,206-XXX-XXXX
user_66de96dbfedd45a0b5eb735b77f0f67f,2024-09-11 14:02:05,Moderator,example@yahoo.com,206-XXX-XXXX
user_4ec17854e89a4afa95afc186f2ae4877,2024-09-11 14:02:05,Moderator,example@hotmail.com,415-XXX-XXXX


### Cleanup

In [37]:
%%sql
STOP ALL PIPELINES;

In [38]:
%%sql
DROP DATABASE ec2_kafka;