Skip to content
This repository has been archived by the owner on Sep 1, 2023. It is now read-only.

Commit

Permalink
NUP-2487: Address reviewer feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
lscheinkman committed Feb 5, 2018
1 parent 085cdb2 commit 419bbaa
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 53 deletions.
99 changes: 58 additions & 41 deletions examples/prediction/category_prediction/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ python webdata.py
```text
The following table shows the encoded SDRs for every page category in the dataset
+---------------+-----------------------------------------------------------------------------+
| Page Category | Encoded SDR |
| Page Category | Encoded SDR (on bit indices) |
+---------------+-----------------------------------------------------------------------------+
| bbs | [ 19 26 115 171 293 364 390 442 470 477 550 598 624 670 705 719 744 748 |
| | 788 850 956] |
Expand Down Expand Up @@ -60,51 +60,68 @@ The following table shows the encoded SDRs for every page category in the datase
| | 805 970 987] |
+---------------+-----------------------------------------------------------------------------+
Start Learning page sequences using the first 100000 user sessions
Learned 100000 Sessions
Start Learning page sequences using the first 10000 user sessions
Learned 10000 Sessions
Finished Learning
Start Inference using a new user session from the dataset
User Session to Predict: ['frontpage', 'frontpage', 'frontpage', 'frontpage', 'frontpage', 'frontpage', 'frontpage', 'frontpage', 'weather', 'weather', 'weather', 'weather', 'news', 'news', 'weather', 'frontpage']
+-----------+-----------------------------------------------------------------------------------------+
| Page | Prediction |
+-----------+-----------------------------------------------------------------------------------------+
| frontpage | ('frontpage', 'news', 'business', 'sports', 'on-air', 'tech', 'local', 'misc') |
+-----------+-----------------------------------------------------------------------------------------+
| frontpage | ('frontpage', 'news', 'business', 'sports', 'on-air', 'tech', 'local', 'misc') |
+-----------+-----------------------------------------------------------------------------------------+
| frontpage | ('frontpage', 'news', 'business', 'sports', 'on-air', 'tech', 'local', 'misc') |
+-----------+-----------------------------------------------------------------------------------------+
| frontpage | ('frontpage', 'news', 'business', 'sports', 'on-air', 'tech', 'local', 'misc') |
+-----------+-----------------------------------------------------------------------------------------+
| frontpage | ('frontpage', 'news', 'business', 'sports', 'on-air', 'tech', 'local', 'misc') |
+-----------+-----------------------------------------------------------------------------------------+
| frontpage | ('frontpage', 'news', 'business', 'sports', 'on-air', 'tech', 'local', 'misc') |
+-----------+-----------------------------------------------------------------------------------------+
| frontpage | ('frontpage', 'news', 'business', 'sports', 'on-air', 'tech', 'local', 'misc') |
+-----------+-----------------------------------------------------------------------------------------+
| frontpage | ('frontpage', 'news', 'business', 'sports', 'on-air', 'tech', 'local', 'misc') |
+-----------+-----------------------------------------------------------------------------------------+
| weather | ('weather', 'frontpage', 'news', 'on-air', 'msn-news', 'msn-sports', 'local', 'sports') |
+-----------+-----------------------------------------------------------------------------------------+
| weather | ('weather', 'frontpage', 'news', 'on-air', 'msn-news', 'msn-sports', 'local', 'sports') |
+-----------+-----------------------------------------------------------------------------------------+
| weather | ('weather', 'frontpage', 'news', 'on-air', 'msn-news', 'msn-sports', 'local', 'sports') |
+-----------+-----------------------------------------------------------------------------------------+
| weather | ('weather', 'frontpage', 'news', 'on-air', 'msn-news', 'msn-sports', 'local', 'sports') |
+-----------+-----------------------------------------------------------------------------------------+
| news | ('news', 'frontpage', 'on-air', 'local', 'tech', 'weather', 'business', 'sports') |
+-----------+-----------------------------------------------------------------------------------------+
| news | ('news', 'frontpage', 'on-air', 'local', 'tech', 'weather', 'business', 'sports') |
+-----------+-----------------------------------------------------------------------------------------+
| weather | ('weather', 'frontpage', 'news', 'on-air', 'msn-news', 'msn-sports', 'local', 'sports') |
+-----------+-----------------------------------------------------------------------------------------+
| frontpage | ('frontpage', 'news', 'business', 'sports', 'on-air', 'tech', 'local', 'misc') |
+-----------+-----------------------------------------------------------------------------------------+
User Session to Predict: ['on-air', 'misc', 'misc', 'misc', 'on-air', 'misc', 'misc', 'misc', 'on-air', 'on-air', 'on-air', 'on-air', 'tech', 'msn-news', 'tech', 'msn-news', 'local', 'tech', 'local', 'local', 'local', 'local', 'local', 'local']
+----------+---------------------------------------------------------------------------------------+
| Page | Prediction |
+----------+---------------------------------------------------------------------------------------+
| on-air | ('on-air', 'misc', 'frontpage', 'news', 'summary', 'msn-news', 'weather', 'local') |
+----------+---------------------------------------------------------------------------------------+
| misc | ('misc', 'frontpage', 'on-air', 'local', 'msn-news', 'msn-sports', 'news', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| misc | ('misc', 'frontpage', 'on-air', 'local', 'msn-news', 'msn-sports', 'news', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| misc | ('misc', 'frontpage', 'on-air', 'local', 'msn-news', 'msn-sports', 'news', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| on-air | ('on-air', 'misc', 'frontpage', 'news', 'summary', 'msn-news', 'weather', 'local') |
+----------+---------------------------------------------------------------------------------------+
| misc | ('misc', 'frontpage', 'on-air', 'local', 'msn-news', 'msn-sports', 'news', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| misc | ('misc', 'frontpage', 'on-air', 'local', 'msn-news', 'msn-sports', 'news', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| misc | ('misc', 'frontpage', 'on-air', 'local', 'msn-news', 'msn-sports', 'news', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| on-air | ('on-air', 'misc', 'frontpage', 'news', 'summary', 'msn-news', 'weather', 'local') |
+----------+---------------------------------------------------------------------------------------+
| on-air | ('on-air', 'misc', 'frontpage', 'news', 'summary', 'msn-news', 'weather', 'local') |
+----------+---------------------------------------------------------------------------------------+
| on-air | ('on-air', 'misc', 'frontpage', 'news', 'summary', 'msn-news', 'weather', 'local') |
+----------+---------------------------------------------------------------------------------------+
| on-air | ('on-air', 'misc', 'frontpage', 'news', 'summary', 'msn-news', 'weather', 'local') |
+----------+---------------------------------------------------------------------------------------+
| tech | ('tech', 'frontpage', 'news', 'msn-news', 'on-air', 'business', 'local', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| msn-news | ('msn-news', 'frontpage', 'local', 'weather', 'misc', 'on-air', 'msn-sports', 'tech') |
+----------+---------------------------------------------------------------------------------------+
| tech | ('tech', 'frontpage', 'news', 'msn-news', 'on-air', 'business', 'local', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| msn-news | ('msn-news', 'frontpage', 'local', 'weather', 'misc', 'on-air', 'msn-sports', 'tech') |
+----------+---------------------------------------------------------------------------------------+
| local | ('local', 'frontpage', 'misc', 'news', 'msn-news', 'on-air', 'weather', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| tech | ('tech', 'frontpage', 'news', 'msn-news', 'on-air', 'business', 'local', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| local | ('local', 'frontpage', 'misc', 'news', 'msn-news', 'on-air', 'weather', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| local | ('local', 'frontpage', 'misc', 'news', 'msn-news', 'on-air', 'weather', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| local | ('local', 'frontpage', 'misc', 'news', 'msn-news', 'on-air', 'weather', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| local | ('local', 'frontpage', 'misc', 'news', 'msn-news', 'on-air', 'weather', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| local | ('local', 'frontpage', 'misc', 'news', 'msn-news', 'on-air', 'weather', 'sports') |
+----------+---------------------------------------------------------------------------------------+
| local | ('local', 'frontpage', 'misc', 'news', 'msn-news', 'on-air', 'weather', 'sports') |
+----------+---------------------------------------------------------------------------------------+
Compute prediction accuracy by checking if the next page in the sequence is within the predicted pages calculated by the model:
- Prediction Accuracy: 0.698660714286
- Accuracy Predicting Top 3 Pages: 0.814732142857
- Prediction Accuracy: 0.614173228346
- Accuracy Predicting Top 3 Pages: 0.825196850394
```

----------------------------------------------------------------------------------------
Expand Down
28 changes: 16 additions & 12 deletions examples/prediction/category_prediction/webdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,10 @@
},
}

TRAINING_RECORDS = 100000
# Learn page sequences from the first 10,000 user sessions.
# We chose 10,000 because it gives results that are good enough for this example
# Use more records for learning to improve the prediction accuracy
LEARNING_RECORDS = 10000



Expand All @@ -145,7 +148,7 @@ def computeAccuracy(model, size, top):
Compute prediction accuracy by checking if the next page in the sequence is
within the top N predictions calculated by the model
Args:
model: Trained model
model: HTM model
size: Sample size
top: top N predictions to use
Expand All @@ -162,8 +165,8 @@ def computeAccuracy(model, size, top):
for _ in xrange(7):
next(datafile)

# Skip training data and compute accuracy using only new sessions
for _ in xrange(TRAINING_RECORDS):
# Skip learning data and compute accuracy using only new sessions
for _ in xrange(LEARNING_RECORDS):
next(datafile)

# Compute prediction accuracy by checking if the next page in the sequence
Expand Down Expand Up @@ -216,7 +219,8 @@ def main():
model.enableInference({"predictedField": "page"})

# Use the model encoder to display the encoded SDRs the model will learn
sdr_table = PrettyTable(field_names=["Page Category", "Encoded SDR"],
sdr_table = PrettyTable(field_names=["Page Category",
"Encoded SDR (on bit indices)"],
sortby="Page Category")
sdr_table.align = "l"

Expand All @@ -232,7 +236,7 @@ def main():
print sdr_table

# At this point our model is configured and ready to learn the user sessions
# Extract the training data from MSNBC archive and stream it to the model
# Extract the learning data from MSNBC archive and stream it to the model
filename = os.path.join(os.path.dirname(__file__), "msnbc990928.zip")
with zipfile.ZipFile(filename) as archive:
with archive.open("msnbc990928.seq") as datafile:
Expand All @@ -241,10 +245,10 @@ def main():
next(datafile)

print
print "Start Learning page sequences using the first {} user " \
"sessions".format(TRAINING_RECORDS)
print "Start learning page sequences using the first {} user " \
"sessions".format(LEARNING_RECORDS)
model.enableLearning()
for count in xrange(TRAINING_RECORDS):
for count in xrange(LEARNING_RECORDS):
# Learn each user session as a single sequence
session = readUserSession(datafile)
model.resetSequenceStates()
Expand All @@ -255,11 +259,11 @@ def main():
sys.stdout.write("\rLearned {} Sessions".format(count + 1))
sys.stdout.flush()

print "\nFinished Learning"
print "\nFinished learning"
model.disableLearning()

# Use the newly trained model to predict next user session
# The test data starts right after the training data
# Use the new HTM model to predict next user session
# The test data starts right after the learning data
print
print "Start Inference using a new user session from the dataset"
prediction_table = PrettyTable(field_names=["Page", "Prediction"],
Expand Down

0 comments on commit 419bbaa

Please sign in to comment.