Skip to content

Commit

Permalink
Make conventions uniform across tutorials (#70)
Browse files Browse the repository at this point in the history
* Merge with master

* Address comments

* style fix
  • Loading branch information
brahmaneya committed Aug 14, 2019
1 parent c66adfc commit 3476da0
Show file tree
Hide file tree
Showing 12 changed files with 3,370 additions and 3,421 deletions.
124 changes: 66 additions & 58 deletions crowdsourcing/crowdsourcing_tutorial.ipynb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 7 additions & 7 deletions crowdsourcing/crowdsourcing_tutorial.py
Expand Up @@ -216,9 +216,9 @@ def polarity_negative_2(x):
# %%
from snorkel.analysis import metric_score

Y_dev_preds = label_model.predict(L_dev)
preds_dev = label_model.predict(L_dev)

acc = metric_score(Y_dev, Y_dev_preds, probs=None, metric="accuracy")
acc = metric_score(Y_dev, preds_dev, probs=None, metric="accuracy")
print(f"LabelModel Accuracy: {acc:.3f}")

# %% [markdown]
Expand All @@ -232,7 +232,7 @@ def polarity_negative_2(x):
# Let's generate a set of probabilistic labels for that training set.

# %%
Y_train_preds = label_model.predict(L_train)
preds_train = label_model.predict(L_train)

# %% [markdown]
# ## Use Soft Labels to Train End Model
Expand All @@ -257,8 +257,8 @@ def encode_text(text):
return model(input_ids)[0].mean(1)[0].detach().numpy()


train_vectors = np.array(list(df_train.tweet_text.apply(encode_text).values))
test_vectors = np.array(list(df_test.tweet_text.apply(encode_text).values))
X_train = np.array(list(df_train.tweet_text.apply(encode_text).values))
X_test = np.array(list(df_test.tweet_text.apply(encode_text).values))

# %% [markdown]
# ### Model on soft labels
Expand All @@ -269,10 +269,10 @@ def encode_text(text):
from sklearn.linear_model import LogisticRegression

sklearn_model = LogisticRegression(solver="liblinear")
sklearn_model.fit(train_vectors, Y_train_preds)
sklearn_model.fit(X_train, preds_train)

# %%
print(f"Accuracy of trained model: {sklearn_model.score(test_vectors, Y_test)}")
print(f"Accuracy of trained model: {sklearn_model.score(X_test, Y_test)}")

# %% [markdown]
# We now have a trained model that can be applied to future examples without requiring crowdsourced labels, and with accuracy not much lower than the `LabelModel` that _does_ have access to crowdsourced labels!
Expand Down
36 changes: 19 additions & 17 deletions recsys/recsys_tutorial.py
Expand Up @@ -16,7 +16,7 @@
# ## Loading Data

# %% [markdown]
# We start by running the `download_and_process_data` function. The function returns the `df_train`, `df_test`, `df_dev`, `df_val` dataframes, which correspond to our training, test, development, and validation sets. Each of those dataframes has the following fields:
# We start by running the `download_and_process_data` function. The function returns the `df_train`, `df_test`, `df_dev`, `df_valid` dataframes, which correspond to our training, test, development, and validation sets. Each of those dataframes has the following fields:
# * `user_idx`: A unique identifier for a user.
# * `book_idx`: A unique identifier for a book that is being rated by the user.
# * `book_idxs`: The set of books that the user has interacted with (read or planned to read).
Expand All @@ -29,7 +29,7 @@
# %%
from utils import download_and_process_data

(df_train, df_test, df_dev, df_val), df_books = download_and_process_data()
(df_train, df_test, df_dev, df_valid), df_books = download_and_process_data()

df_books.head()

Expand Down Expand Up @@ -172,17 +172,17 @@ def polarity_negative(x):
L_train = applier.apply(df_train)
label_model = LabelModel(cardinality=2, verbose=True)
label_model.fit(L_train, n_epochs=5000, seed=123, log_freq=20, lr=0.01)
Y_train_preds = label_model.predict(L_train)
preds_train = label_model.predict(L_train)

# %%
import pandas as pd
from snorkel.labeling import filter_unlabeled_dataframe

df_train_filtered, Y_train_preds_filtered = filter_unlabeled_dataframe(
df_train, Y_train_preds, L_train
df_train_filtered, preds_train_filtered = filter_unlabeled_dataframe(
df_train, preds_train, L_train
)
df_train_filtered["rating"] = Y_train_preds_filtered
combined_df_train = pd.concat([df_train_filtered, df_dev], axis=0)
df_train_filtered["rating"] = preds_train_filtered
df_combined = pd.concat([df_train_filtered, df_dev], axis=0)

# %% [markdown]
# ### Rating Prediction Model
Expand Down Expand Up @@ -273,9 +273,11 @@ def get_data_tensors(df):
)
tensor_dict = tf.compat.v1.data.make_one_shot_iterator(dataset).get_next()
return (
tensor_dict["len_book_idxs"],
tensor_dict["book_idxs"],
tensor_dict["book_idx"],
(
tensor_dict["len_book_idxs"],
tensor_dict["book_idxs"],
tensor_dict["book_idx"],
),
tensor_dict["label"],
)

Expand All @@ -286,13 +288,13 @@ def get_data_tensors(df):
# %%
model = get_model()

train_data_tensors = get_data_tensors(combined_df_train)
val_data_tensors = get_data_tensors(df_val)
X_train, Y_train = get_data_tensors(df_combined)
X_valid, Y_valid = get_data_tensors(df_valid)
model.fit(
train_data_tensors[:-1],
train_data_tensors[-1],
X_train,
Y_train,
steps_per_epoch=300,
validation_data=(val_data_tensors[:-1], val_data_tensors[-1]),
validation_data=(X_valid, Y_valid),
validation_steps=40,
epochs=30,
verbose=1,
Expand All @@ -301,8 +303,8 @@ def get_data_tensors(df):
# Finally, we evaluate the model's predicted ratings on our test data.
#
# %%
test_data_tensors = get_data_tensors(df_test)
model.evaluate(test_data_tensors[:-1], test_data_tensors[-1], steps=30)
X_test, Y_test = get_data_tensors(df_test)
model.evaluate(X_test, Y_test, steps=30)

# %% [markdown]
# ## Summary
Expand Down

0 comments on commit 3476da0

Please sign in to comment.