Skip to content

Commit

Permalink
Merge pull request #205 from cchrewrite/dev
Browse files Browse the repository at this point in the history
Update food recommendation model
  • Loading branch information
cchrewrite committed Nov 10, 2022
2 parents d7c68a7 + 54b54de commit aa15621
Show file tree
Hide file tree
Showing 3 changed files with 485 additions and 12 deletions.
26 changes: 15 additions & 11 deletions examples/models/food_analysis/MLPFoodRecommendationModel.py
Expand Up @@ -23,10 +23,9 @@
import argparse
import os
import random
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

from singa_auto.model import BaseModel, utils
from singa_auto.model import BaseModel, IntegerKnob, utils
from singa_auto.constants import ModelDependency
from singa_auto.model.dev import test_model_class
from singa_auto.datasets.image_classification_dataset import ImageDataset4Clf
Expand All @@ -42,7 +41,10 @@ class MLPFoodRecommendationModel(BaseModel):

@staticmethod
def get_knob_config():
return {}
return {
'num_hid_layers': IntegerKnob(3, 6),
'num_hid_units': IntegerKnob(64, 512)
}

def __init__(self, **knobs):

Expand Down Expand Up @@ -169,7 +171,7 @@ def knowledge_base_weight_normalisation(self):
for t in self.KB[h][p]:
self.KB[h][p][t] = self.KB[h][p][t] / imp_sum
return 0

def sample_triples_from_knowledge_base(self, entity, n):
result = []
h = entity
Expand Down Expand Up @@ -256,9 +258,7 @@ def read_dataset(self, fpath):
for j in range(encodings.shape[1]):
feat.append(encodings[i][j])
tgt.append(int(y[1]))
#input(y[0])
#input(feat[-1])
#input(tgt[-1])

#feat = np.array(feat)
#tgt = np.array(tgt)
return feat, tgt
Expand Down Expand Up @@ -307,10 +307,13 @@ def train(self, dataset_path, work_dir, **kwargs):

self.clf = dict()
for tag in self.tag_list:
self.clf[tag] = MLPClassifier(random_state = 1, max_iter = 100, solver = "lbfgs", hidden_layer_sizes = (128, 128, 128, 128))
# can use random forests if MLP is too slow.
#self.clf[tag] = RandomForestClassifier(n_estimators = 10, random_state=0)
num_hid_layers = self._knobs.get("num_hid_layers")
num_hid_units = self._knobs.get("num_hid_units")
hidden_layer_sizes = [int(num_hid_units)] * int(num_hid_layers)

self.clf[tag] = MLPClassifier(random_state = 1, max_iter=1000, solver = "lbfgs", hidden_layer_sizes = hidden_layer_sizes)

#self.clf[tag] = MLPClassifier(random_state = 1, max_iter = 100, solver = "lbfgs", hidden_layer_sizes = (128, 128, 128, 128))

print("Reading knowledge base...")
kb_path = "%s/training_data/food_knowledge_base.tri"%work_dir
Expand Down Expand Up @@ -410,7 +413,7 @@ def print_knowledge_graph(self):

(args, _) = parser.parse_known_args()

queries = [str(["海菜", "puerpera_tag"]), str(["鱼肉", "pregnant_tag"]), str(["Mars", "pregnant_tag"])]
queries = [str(["海菜", "puerpera_tag"]), str(["鱼肉", "pregnant_tag"]), str(["Milk", "pregnant_tag"])]

test_model_class(model_file_path=__file__,
model_class='MLPFoodRecommendationModel',
Expand All @@ -419,5 +422,6 @@ def print_knowledge_graph(self):
train_dataset_path=args.train_path,
val_dataset_path=args.val_path,
#test_dataset_path=args.test_path,
budget={'MODEL_TRIAL_COUNT': 10, 'TIME_HOURS': 1.0},
queries=queries)

49 changes: 48 additions & 1 deletion examples/models/food_analysis/README.md
@@ -1 +1,48 @@
This folder includes food analysis models.
# Singa-Auto Demo - Food Recommendation.

This folder contains a number of models for food recommendation with knowledge graphs.

## Dataset Preparation

The training and evaluation data should be compressed into a single .tar file. The two tar files contain a "training_data" folder and an "evaluation_data", respectively.

The "training_data" folder has the following files:

(1) food_knowledge_base.tri: It is a knowledge base containing triples of the form "\<subject\> \<predicate\> \<object\>". For example:

milk contain protein
milk contain vitamin_a
protein is_good_for brain
...
prawn contain protein

(2) tag_list.txt: It contains N prediction tags. For example:

pregnant_tag
puerpera_tag
lactation_tag
baby_tag

(3) N files named "\<tag_name\>_training.txt". Each file contains training data of the form "\<entity\> \<class\>", which indicates the class of the entity with respect to the tag. For example, let Class 0 denote food suitable for a baby, and Class 1 denote food not suitable for a baby, we have a file named "baby_tag_training.txt":

milk 0
prawn 1
...
orange 0

The data is used to train classifiers predicting the probability that a given entity belongs to each class. Trained classifiers are evaluated using the "evaluation_data" folder that has N files named "\<tag_name\>_evaluation.txt". The format of evaluation data is the same as training data.

## Prediction/Inference

A query should be a Python list of strings. Each string is of the form "[\<entity\>, \<tag\>]". For example:

[str(["milk", "baby_tag"]), str(["prawn", "baby_tag"]), str(["orange", "baby_tag"])]

The model will return the probability that a given entity belongs to each class with respect to a given tag.

## Model Description

There are two models:
1. RFFoodRecommendationModel.py, which is a random forest. It performs auto parameter tuning on the number of estimators.
2. MLPFoodRecommendationModel.py, which is a feedforward neural network. It performs auto parameter tuning on the number of hidden layers and hidden units.

0 comments on commit aa15621

Please sign in to comment.