Skip to content

Commit

Permalink
Demo Script: Naive Bayes (works fine on Greenplum)
Browse files Browse the repository at this point in the history
  • Loading branch information
Florian Schoppmann authored and Florian Schoppmann committed Jan 25, 2011
1 parent 13665d1 commit 7678159
Showing 1 changed file with 79 additions and 16 deletions.
95 changes: 79 additions & 16 deletions examples/demo/naive-bayes.sql
Expand Up @@ -4,30 +4,93 @@
\qecho
\qecho === Refresh table: bayes ========================================

DROP TABLE bayes;
CREATE TABLE bayes (id INT, class INT, attributes INT[]) distributed by (ID);
INSERT INTO bayes VALUES( 1 , 1 , array[1,2,3]);
INSERT INTO bayes VALUES( 3 , 1 , array[1,4,3]);
INSERT INTO bayes VALUES( 5 , 2 , array[0,2,2]);
INSERT INTO bayes VALUES( 2 , 1 , array[1,2,1]);
INSERT INTO bayes VALUES( 4 , 2 , array[1,2,2]);
INSERT INTO bayes VALUES( 6 , 2 , array[0,1,3]);
SET client_min_messages = error;

DROP TABLE IF EXISTS bayes CASCADE;
DROP TABLE IF EXISTS toclassify CASCADE;

CREATE TABLE bayes
(
id integer NOT NULL,
class INTEGER,
attributes INTEGER[],
CONSTRAINT pk_bayes PRIMARY KEY (id)
);

COPY bayes (id, class, attributes) FROM stdin;
1 1 {1, 2, 3}
3 1 {1, 4, 3}
5 2 {0, 2, 2}
2 1 {1, 2, 1}
4 2 {1, 2, 2}
6 2 {0, 1, 3}
\.


\qecho === Show training data ==========================================

SELECT * FROM bayes;


\qecho === Refresh table: toclassify ===================================

CREATE TABLE toclassify
(
id SERIAL NOT NULL,
attributes INTEGER[],
CONSTRAINT pk_toclassify PRIMARY KEY (id)
);

COPY toclassify (attributes) FROM stdin;
{0, 2, 1}
{1, 2, 3}
\.


\qecho === Show Data we want to run Naive Bayes classification on ======

SELECT * FROM toclassify;

SET CLIENT_MIN_MESSAGES=WARNING;

\qecho === Precompute feature probabilities and class priors ===========
DROP TABLE IF EXISTS nb_class_priors;
DROP TABLE IF EXISTS nb_feature_probs;

DROP TABLE IF EXISTS nb_class_priors CASCADE;
DROP TABLE IF EXISTS nb_feature_probs CASCADE;

SELECT madlib.create_nb_prepared_data_tables(
'bayes', 'class', 'attributes', 3,
'nb_feature_probs', 'nb_class_priors');
'bayes', 'class', 'attributes', 3,
'nb_feature_probs', 'nb_class_priors');


\qecho === Show feature probabilities and class priors =================

SELECT * FROM nb_feature_probs ORDER BY class, attr, value;

select * from nb_class_priors;
SELECT * FROM nb_class_priors ORDER BY class;

select * from toclassify;

\qecho === Run Naive Bayes =============================================
select madlib.create_nb_classify_view(

DROP VIEW IF EXISTS nb_classify_view_fast;

SELECT madlib.create_nb_classify_view(
'nb_feature_probs', 'nb_class_priors',
'toclassify', 'id', 'attributes', 3,
'nb_classify_view_fast');

SELECT * from nb_classify_view_fast;


\qecho === Look at the probabilities for each class
\qecho === (Note we use Laplacian Smoothing):

DROP VIEW IF EXISTS nb_probs_view_fast;

SELECT madlib.create_nb_probs_view(
'nb_feature_probs', 'nb_class_priors',
'toclassify', 'id', 'attributes', 3,
'nb_probs_view_fast');

SELECT * FROM nb_probs_view_fast;

RESET client_min_messages;

0 comments on commit 7678159

Please sign in to comment.