## NER

In [None]:
df_news = df_news.with_columns(Organizations = pl.Series(organizations))

In [None]:
# Initialize GLiNER with the base model
model = GLiNER.from_pretrained("EmergentMethods/gliner_medium_news-v2.1")

# Labels for entity prediction
labels = ["Organization"] # for v2.1 use capital case for better performance

# Perform entity prediction
organizations = []
for i, news in enumerate(df_news["texte1"]):
    entities = model.predict_entities(news, labels, threshold=0.5)
    list_entities = [entity["text"] for entity in entities if entity["text"] not in list_entities]
    organizations.append(list_entities)

In [None]:
import spacy

custom_spacy_config = { "gliner_model": "urchade/gliner_base",
                            "chunk_size": 250,
                            "labels": ["company"],
                            "style": "ent"}
nlp = spacy.blank("en")
nlp.add_pipe("gliner_spacy", config=custom_spacy_config)

for news in df_news[:2,"texte1"]:
    doc = nlp(news)
    for ent in doc.ents:
        print(ent.text, ent.label)

#Output
# Bill Gates person 0.9967108964920044
# Tom person 0.9612326622009277
# Microsoft organization 0.9966742992401123  

Walmart 6905553075311563409
Walmart 6905553075311563409
company 6905553075311563409
Sam’s Club 6905553075311563409
Walmart 6905553075311563409
Walmart 6905553075311563409
Google 6905553075311563409
Walmart 6905553075311563409
Walmart 6905553075311563409
Walmart 6905553075311563409
Walmart InHome 6905553075311563409
Startup Battlefield 6905553075311563409
Techcrunch 6905553075311563409
Walmart 6905553075311563409
Amazon 6905553075311563409
Amazon 6905553075311563409
Walmart 6905553075311563409
Walmart 6905553075311563409
Walmart 6905553075311563409
Walmart 6905553075311563409
Walmart Inc. 6905553075311563409
Walmart 6905553075311563409
Walmart 6905553075311563409
Walmart 6905553075311563409
Walmart 6905553075311563409
Walmart 6905553075311563409
My Assistant 6905553075311563409
Walmart 6905553075311563409
Walmart 6905553075311563409
Walmart 6905553075311563409
Microsoft 6905553075311563409
Walmart 6905553075311563409
Azure OpenAI 6905553075311563409
Microsoft 6905553075311563409
Walmart

In [None]:
import polars as pl

# Define the regex pattern for the company names
companies = r"\b(Berkshire Hathaway|JPMorgan|Bank of America|Wells Fargo|CVS Health|UnitedHealth|McKesson|AmerisourceBergen|Walmart|Costco|Kroger|Home Depot|General Motors|Boeing|Caterpillar|Ford)\b"

# Apply the regex pattern to extract company names
df_news = (
    df_news
    .with_columns(
        Test=pl.col("texte1").str.extract_all(companies).list.unique()
    )
    .filter(
        pl.col("Test").list.len() > 0
    )
)


In [None]:
df_news

Titre1,Lien_du_titre,texte1,Date de publication,Test
str,str,str,date,list[str]
"""Walmart debuts generative AI search and AI replenishment features at CES""","""https://techcrunch.com/2024/01/09/walmart-debuts-generative-ai-search-and-ai-replenishment-features-at-ces/""","""In a keynote address at the Consumer Electronics Show in Las Vegas, Walmart president and CEO Doug McMillon and other Walmart execs offered a glimpse as to how the retail giant was putting new technologies, including augmented reality (AR), drones, generative AI and other artificial intelligence tech to work in order improve the shopping experience for customers. At the trade show, the company revealed a handful of new products, including two AI-powered tools for managing product search and repl…",2024-01-09,"[""Walmart""]"
"""Ibotta’s expansion into enterprise should set it up for a successful IPO""","""https://techcrunch.com/2024/03/28/ibottas-expansion-into-enterprise-should-set-it-up-for-a-successful-ipo/""","""Ibotta confidently submitted an S-1 filing with the SEC on March 22 with the intent to list its shares on the New York Stock Exchange. The 13-year-old cash-back startup looks to make its public debut after turning profitable and recording impressive revenue growth in 2023. The company reported $320 million in revenue in 2023, up 52% from 2022 when it produced $210 million in revenue. Ibotta’s gross profits grew 68% from 2022, $164.5 million, to 2023, $276 million.  The Denver-based c…",2024-03-28,"[""Walmart"", ""Kroger""]"
"""Walmart+ adds travel benefits to its list of perks""","""https://techcrunch.com/2023/07/26/walmart-adds-travel-benefits-list-of-perks/""","""Walmart+, the U.S. retailer’s Prime competitor, is adding travel benefits to its list of perks. The company has announced that members can book getaways through WalmartPlusTravel.com and get Walmart Cash. Walmart+ members will receive 5% Walmart Cash on hotels, vacation rentals, car rentals and activities bookings, 2% Walmart Cash on all flights and a blended rate of Walmart Cash on vacation packages. Travel booking is powered by Expedia Group’s White Label Template technology, giving members a…",2023-07-26,"[""Walmart""]"
"""Walmart returns to Roblox after its first games were attacked by consumer advocacy groups""","""https://techcrunch.com/2023/09/27/walmart-returns-to-roblox-after-its-first-games-were-attacked-by-consumer-advocacy-groups/""","""Walmart is returning to Roblox, but this time with a virtual world where gamers can discover new virtual items and accessories to personalize their experience, not browse or shop items from Walmart itself. The retailer a year ago had launched two Walmart-branded experiences on the gaming platform, but came under fire from consumer advocacy groups who wanted the games audited for stealth marketing to kids. Those two virtual worlds, “Walmart Land” and “Universe of Play,” let Roblox players collect…",2023-09-27,"[""Walmart""]"
"""Walmart+ Assist halves the price of its Prime competitor for those on government assistance""","""https://techcrunch.com/2023/07/20/walmart-assist-halves-the-price-of-its-prime-competitor-for-those-on-government-assistance/""","""Walmart+, the U.S. retailer’s Prime competitor, is introducing a new program designed to help those on government assistance take advantage of its membership benefits, like free shipping and delivery, gas discounts and access to streaming service Paramount+. The company announced this morning the launch of Walmart+ Assist, a way to purchase a Walmart+ membership at 50% off ($49 per year, or $6.47 per month) for those on government assistance. Typically, a Walmart+ membership costs $98 per year o…",2023-07-20,"[""Walmart""]"
…,…,…,…,…
"""The Station: EV SPACs face new regulatory speed bump, more on Rivian’s reorg and VW weighs direct sales for Scout brand""","""https://techcrunch.com/2022/05/30/the-station-ev-spacs-face-new-regulatory-speed-bump-more-on-rivians-reorg-and-vw-weighs-direct-sales-for-scout-brand/""","""Welcome back to The Station, your weekly guide to everything going on in the world of transportation. Many readers of this weekly newsletter are likely enjoying a three-day weekend thanks to the Memorial Day holiday. So, this week I will keep it a wee bit shorter.  Before we get started, check out the latest transportation Q&A, this time with Convoy co-founder and CEO Dan Lewis. He predicts digital freight will go mainstream within the year. Why? Lewis said: “The industry is going to…",2022-05-30,"[""Caterpillar"", ""Walmart""]"
"""ClearFlame Engine Technologies takes aim at cleaning up diesel engines""","""https://techcrunch.com/2020/09/17/clearflame-engine-technologies-takes-aim-at-cleaning-up-diesel-engines/""","""Diesel engines are the workhorses of freight transportation and agriculture — and by extension keep the economy fed and well supplied. They also have a dirty side. The founders of ClearFlame Engine Technologies, a four-year-old startup based in Geneva, Illinois, say they have found a way to clean them up.  The company, which participated in TechCrunch Disrupt’s 2020 Startup Battlefield competition, has developed a novel way to get diesel style engines to operate on renewable fuels li…",2020-09-17,"[""Caterpillar""]"
"""The Station: Rivian adds to its EV war chest, Sec. Buttigieg is coming to Disrupt, Argo preps to launch with Lyft""","""https://techcrunch.com/2021/07/26/the-statin-rivian-adds-to-its-ev-war-chest-sec-buttigieg-is-coming-to-disrupt-and-argo-preps-to-launch-with-lyft/""","""The Station is a weekly newsletter dedicated to all things transportation. Sign up here — just click The Station — to receive it every Sunday in your inbox. Hello friends and readers. Welcome to The Station, your central hub for all past, present and future means of moving people and packages from Point A to Point B.  Hey so maybe y’all missed it, but we shared some exciting news this week. Transportation Secretary Pete Buttigieg will join us for a fireside chat at Disrupt 2021, wher…",2021-07-26,"[""Ford"", ""Caterpillar""]"
"""The Station: Spin heads to Europe, Just Eat Takeaway gobbles up Grubhub and a drive in a Bentley Flying Spur""","""https://techcrunch.com/2020/06/14/the-station-spin-heads-to-europe-just-eat-takeaway-gobbles-up-grubhub-and-a-drive-in-a-bentley-flying-spur/""","""The Station is a weekly newsletter dedicated to all things transportation. Sign up here — just click The Station — to receive it every Saturday in your inbox. Hi friends and first-time readers. Welcome back to The Station, a newsletter dedicated to all the present and future ways people and packages move from Point A to Point B. I’m your host Kirsten Korosec, senior transportation reporter at TechCrunch.  COVID-19 hasn’t gone away, in case you were wondering. And yet, city, county an…",2020-06-14,"[""Ford"", ""Caterpillar"", ""Boeing""]"
