In [1]:
import hopsworks

proj = hopsworks.login()
fs = proj.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.


In [2]:
!pip install sentence_transformers



In [3]:
import logging

In [4]:
logger = logging.getLogger()
logger.setLevel(logging.WARN)

In [5]:
logging.getLevelName(logging.getLogger().getEffectiveLevel())



In [6]:
import pandas as pd

df = pd.read_csv("dataset/Articles.csv", encoding='utf-8', encoding_errors='ignore')

In [7]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

In [8]:
df["Heading"][0]

'sindh govt decides to cut public transport fares by 7pc kti rej'

In [9]:
len(model.encode(df["Heading"][0]))

384

In [10]:
embeddings_heading = model.encode(df["Heading"])
embeddings_body = model.encode(df["Article"])
df["embedding_headding"] = pd.Series(embeddings_heading.tolist())
df["embedding_body"] = pd.Series(embeddings_body.tolist())
df["id1"] = list(range(len(df)))

In [11]:
df.head()

Unnamed: 0,Article,Date,Heading,NewsType,embedding_headding,embedding_body,id1
0,KARACHI: The Sindh government has decided to b...,1/1/2015,sindh govt decides to cut public transport far...,business,"[-0.0026804539375007153, 0.01034875214099884, ...","[0.015569653362035751, 0.05348445102572441, 0....",0
1,HONG KONG: Asian markets started 2015 on an up...,1/2/2015,asia stocks up in new year trad,business,"[-0.028134547173976898, -0.04525471851229668, ...","[0.01165273878723383, 0.0017589383060112596, 0...",1
2,HONG KONG: Hong Kong shares opened 0.66 perce...,1/5/2015,hong kong stocks open 0.66 percent lower,business,"[0.09226574748754501, -0.0026450688019394875, ...","[0.06687024980783463, -0.04292995110154152, 0....",2
3,HONG KONG: Asian markets tumbled Tuesday follo...,1/6/2015,asian stocks sink euro near nine year,business,"[-0.014624976553022861, 0.04147269204258919, 0...","[-0.017030593007802963, -0.006588279269635677,...",3
4,NEW YORK: US oil prices Monday slipped below $...,1/6/2015,us oil prices slip below 50 a barr,business,"[-0.04315837472677231, 0.0022600365336984396, ...","[-0.005564470309764147, 0.01306432206183672, 0...",4


In [12]:
import random
df_view = pd.DataFrame({"id1": list(range(len(df))), "view_cnt": [random.randint(0, 100) for i in range(len(df))]})

In [33]:
df_view

Unnamed: 0,id1,view_cnt
0,0,81
1,1,14
2,2,3
3,3,94
4,4,35
...,...,...
2687,2687,46
2688,2688,96
2689,2689,18
2690,2690,25


In [34]:
version = 1

In [35]:
from datetime import datetime

In [36]:
from hsfs import embedding
emb = embedding.EmbeddingIndex()
emb.add_embedding("embedding_body", len(df["embedding_body"][0]))
emb.add_embedding("embedding_headding", len(df["embedding_headding"][0]))

news_fg = fs.get_or_create_feature_group(
    name=f"news_fg",
    embedding_index=emb,
    primary_key=["id1"],
    version=version,
    online_enabled=True,
    topic_name=f"news_fg_{version}_onlinefs"
)

news_fg.insert(df, write_options={"start_offline_backfill": True})



Feature Group created successfully, explore it at 
https://localhost:8181/p/1144/fs/1091/fg/1054




Uploading Dataframe: 0.00% |          | Rows 0/2692 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: news_fg_21_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://localhost:8181/p/1144/jobs/named/news_fg_21_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x7f77b022e350>, None)

In [37]:
view_fg = fs.get_or_create_feature_group(
    name="view_fg",
    primary_key=["id1"],
    version=version,
    online_enabled=True,
    topic_name=f"view_fg_{version}_onlinefs"
)

view_fg.insert(df_view, write_options={"start_offline_backfill": True})

Feature Group created successfully, explore it at 
https://localhost:8181/p/1144/fs/1091/fg/1055




Uploading Dataframe: 0.00% |          | Rows 0/2692 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: view_fg_21_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://localhost:8181/p/1144/jobs/named/view_fg_21_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x7f7803e9cdf0>, None)

## Search News

In [54]:
news_fg = fs.get_feature_group(f"news_fg", version=version)

In [55]:
news_description = "news about europe"

In [56]:
news_fg.find_neighbors(model.encode(news_description), k=1, col="embedding_headding")

[(-0.31317261435143673,
  ['NEW YORK: The euro gained ground on Wednesday on hopes that Greeces debt crisis will be resolved in a few days, averting a default to its European Union creditors.The euro, which had fallen to a five-week low of $1.0916 on Tuesday, rebounded as Greek Prime Minister Alexis Tsipras vowed to present "credible" reform plans to creditors by Thursday.The 19-nation currency rose to $1.1074 around 2100 GMT from $1.1007 at the same time Tuesday.European Union President Donald Tusk has set the "final deadline" for a deal on Sunday at a summit of all 28 EU leaders."Between the unusual suspension of trading on the NYSE and the meltdown in Asian equities, investors sent the euro higher on signs of progress towards a deal for Greece," said Kathy Lien of BK Asset Management.The New York Stock Exchange halted trading Wednesday for more than three hours due to technical problems that it said were not caused by hacking.Meanwhile, a weeks-long rout in Chinese equities continue

In [57]:
fv = fs.get_or_create_feature_view("news", version=version, query=news_fg.select(["date", "heading", "newstype"]))

In [58]:
# fv.find_neighbors(model.encode(news_description), k=5)

In [59]:
fv.find_neighbors(model.encode(news_description), k=5, feature=news_fg.embedding_headding)



[['7/9/2015',
  'euro lifts as hopes mount for greek crisis resoluti',
  'business'],
 ['7/15/2016', 'Asian shares rise Europe subdued Nice ', 'business'],
 ['6/24/2016',
  'Pound dives as first Britain EU vote results come i',
  'business'],
 ['6/11/2016',
  'Euro 2016 Clashes in Marseille ahead of England Russia ',
  'sports'],
 ['7/4/2016', 'France hammer Iceland to book Germany showd', 'sports']]

In [60]:
fv.find_neighbors(model.encode(news_description), k=5, feature=news_fg.embedding_body)

[['10/13/2016',
  'US banks planning exodus from Brexit Britain French minister',
  'business'],
 ['3/22/2016',
  'Stocks fall gold and govt bonds rise after Brussels explosi',
  'business'],
 ['9/17/2016',
  'Protesters rally across Germany against mega trade d',
  'business'],
 ['6/10/2016',
  'British soccer fans and locals clash in Marseille as Euro 2016 ',
  'sports'],
 ['11/9/2016', 'Trump win not good day world economy ECB Nowotny', 'business']]

In [61]:
# fv.find_neighbors(model.encode(news_description), k=5, 
#                   filter=news_fg.newstype == "sports",  feature=news_fg.embedding_headding)

In [62]:
# fv.find_neighbors(model.encode(news_description), k=5, 
#                   filter=((news_fg.newstype == "sports") & (news_fg.heading.like("france"))),
#                  feature=news_fg.embedding_headding)

In [63]:
fv1 = fs.get_or_create_feature_view(
    "news_cnt", version=version, 
    query=news_fg.select(["date", "heading", "newstype"]).join(view_fg.select(["view_cnt"])))

In [64]:
fv1.find_neighbors(model.encode(news_description), k=5, feature=news_fg.embedding_headding)



[['7/9/2015',
  'euro lifts as hopes mount for greek crisis resoluti',
  'business',
  76],
 ['7/15/2016', 'Asian shares rise Europe subdued Nice ', 'business', 17],
 ['6/24/2016',
  'Pound dives as first Britain EU vote results come i',
  'business',
  7],
 ['6/11/2016',
  'Euro 2016 Clashes in Marseille ahead of England Russia ',
  'sports',
  22],
 ['7/4/2016', 'France hammer Iceland to book Germany showd', 'sports', 66]]

In [65]:
fv1.get_feature_vector({"id1": 10})

['1/14/2015', 'tokyo stocks open 0.74 percent lower', 'business', 86]

In [66]:
news_fg.embedding_index.index_name

'1144__embedding_default_project_embedding_0'

## Use project index

In [67]:
from hsfs import embedding

emb = embedding.EmbeddingIndex()
emb.add_embedding("embedding_body", len(df["embedding_body"][0]))

news_fg_1 = fs.get_or_create_feature_group(
    name="news_fg_proj",
    embedding_index=emb,
    primary_key=["id1"],
    version=version,
    online_enabled=True,
    topic_name=f"news_fg_proj_{version}_onlinefs"
)

news_fg_1.insert(df)



Feature Group created successfully, explore it at 
https://localhost:8181/p/1144/fs/1091/fg/1056




Uploading Dataframe: 0.00% |          | Rows 0/2692 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: news_fg_proj_21_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://localhost:8181/p/1144/jobs/named/news_fg_proj_21_offline_fg_materialization/executions


(<hsfs.core.job.Job at 0x7f7803e9c5b0>, None)

In [68]:
news_fg_1.embedding_index.index_name

'1144__embedding_default_project_embedding_0'

In [69]:
news_fg_1.show(n=3, online=True)

[['SINGAPORE: Brent crude oil prices traded around $50 a barrel on Monday, with some support coming from falling US output growth but an expectation of weak Chinese economic data weighing on markets.Analysts said prices were receiving some support around current levels but added that there was not much room for larger price gains."Some positive data points helped to stabilize oil for now...Upbeat IEA comments and a falling US rig count were the latest positive news. While the news was able to halt oil\'s price decline, it was not enough to turn prices bullish," Morgan Stanley said on Monday in a note.China is due to report gross domestic product figures on Tuesday, which are expected to show China\'s full-year growth would undershoot Beijing\'s 7.5-percent target and would be the weakest in 24 years.In Europe, the main event of the week will be Thursday\'s meeting of the European Central Bank (ECB), which is considered almost certain to see the launch of a government bond-buying campai