In [38]:
import pandas as pd
import numpy as np
import re

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, classification_report, roc_curve, precision_score, recall_score
import lightgbm as lgb

In [16]:
df = pd.read_csv('data_aggregation/dataset_for_project/cs5246_data_for_final_proj_lemmatised.csv')

In [17]:
df.head()

Unnamed: 0,text,label,source,word_count,char_count
0,donald trump just couldn t wish american happy...,0,NewsDataSet,495,2893
1,house intelligence committee chairman devin nu...,0,NewsDataSet,305,1898
2,"friday , revealed milwaukee sheriff david clar...",0,NewsDataSet,580,3597
3,"christmas day , donald trump announced work fo...",0,NewsDataSet,444,2774
4,pope francis used annual christmas day message...,0,NewsDataSet,420,2346


### *train test split*

In [18]:
shuffled_df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Define features (X) and target (y)
X = df["text"]
y = df['label']

# Split the dataset (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

*Unique Words?*

In [19]:
def get_unique_words(df):
    words = set()  # Use a set to store unique words
    for text in df.dropna():  # Drop NaN values
        tokens = re.findall(r'\b\w+\b', text.lower())  # Extract words and convert to lowercase
        words.update(tokens)  # Add to set
    return list(words)

# Get unique words
unique_words = get_unique_words(X_train)

In [20]:
len(unique_words)

148300

## TF-IDF on train text

In [21]:
# Initialize and fit the TfidfVectorizer
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(X_train)  # each row will now be a document vector

In [22]:
tfidf_matrix.shape

(59455, 148212)

In [23]:
test_tfidf_matrix = vectorizer.transform(X_test)

## LightGBM

In [24]:
# Assuming you already have your training TF-IDF matrix and training labels (y_train)
param_grid = {
    # 'max_depth': [-1,2,4,6],
    'n_estimators': [5, 50, 100, 200, 400],
    'learning_rate': [0.01, 0.05, 0.1],
    'num_leaves': range(10, 30, 10),
    # 'boosting_type': ['gbdt', 'dart'],
}

In [None]:
cv_strategy = StratifiedKFold(n_splits=2, shuffle=True, random_state=42)

model = lgb.LGBMClassifier(verbosity=-1, class_weight="balanced")  # Use class weight balance to counter class imblanced

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=cv_strategy, n_jobs=15, verbose=1, scoring='roc_auc')
grid_search.fit(tfidf_matrix, y_train)

print("************************")
best_params = grid_search.best_params_
best_score = grid_search.best_score_
y_proba = grid_search.predict_proba(test_tfidf_matrix)[:, 1]
roc_auc = roc_auc_score(y_test, y_proba)
pr_auc = average_precision_score(y_test, y_proba)

print(f"Best parameters: {best_params}")
print(f"Cross Validation ROC-AUC: {best_score}")
print(f"Testing ROC-AUC: {roc_auc}")
print(f"Testing PR-AUC: {pr_auc}")

Fitting 2 folds for each of 30 candidates, totalling 60 fits
************************
Best parameters: {'learning_rate': 0.05, 'n_estimators': 400, 'num_leaves': 20}
Cross Validation ROC-AUC: 0.9797372462170173
Testing ROC-AUC: 0.9821934302279585
Testing PR-AUC: 0.990170326685951


In [30]:
# TODO: need to test

fpr, tpr, thresholds = roc_curve(y_test, y_proba)

# Iterate through thresholds and select the one that gives the best balance
best_threshold = 0.5  # Start with 0.5 as a default
best_f1_score = 0

for threshold in thresholds:
    y_pred_binary = (y_proba > threshold).astype(int)
    precision = precision_score(y_test, y_pred_binary)
    recall = recall_score(y_test, y_pred_binary)
    f1_score = 2 * (precision * recall) / (precision + recall)
    if f1_score > best_f1_score:
        best_f1_score = f1_score
        best_threshold = threshold

print(f"Best Threshold: {best_threshold}")

  _warn_prf(average, modifier, msg_start, len(result))
  f1_score = 2 * (precision * recall) / (precision + recall)
  _warn_prf(average, modifier, msg_start, len(result))
  f1_score = 2 * (precision * recall) / (precision + recall)


Best Threshold: 0.45897408130582334


In [28]:
print(f"Best Threshold: {best_threshold}")

Best Threshold: 0.45897408130582334


In [32]:
y_predict = (y_proba >= best_threshold).astype(int)

In [34]:
confusion_matrix(y_test, y_predict)

array([[5127,  394],
       [ 602, 8741]])

In [35]:
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

           0       0.89      0.93      0.91      5521
           1       0.96      0.94      0.95      9343

    accuracy                           0.93     14864
   macro avg       0.93      0.93      0.93     14864
weighted avg       0.93      0.93      0.93     14864



In [36]:
import pandas as pd
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
import nltk

In [37]:
def remove_stop_words(sentence):
    words = sentence.split()
    filtered_words = [word for word in words if word.lower() not in ENGLISH_STOP_WORDS]
    return ' '.join(filtered_words)

nltk.download("wordnet")
nltk.download("omw-1.4")
nltk.download('punkt')

#  Initialize wordnet lemmatizer
wnl = WordNetLemmatizer()

# Function to lemmatize the text
def lemmatize_text(text):
    words = word_tokenize(text)  # Tokenize the text into words
    lemmatized_words = [wnl.lemmatize(word) for word in words]  # Lemmatize each word
    return ' '.join(lemmatized_words)  # Join words back into a sentence

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/paopao_ch/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/paopao_ch/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to /Users/paopao_ch/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [41]:
def test_misinfo(sentence: str, grid_search: GridSearchCV, best_threshold: float):
    sentence = sentence.lower()
    sentence = remove_stop_words(sentence)
    sentence = lemmatize_text(sentence)
    input_sentence = np.array([sentence])
    test_tfidf_matrix = vectorizer.transform(input_sentence)
    y_proba = grid_search.predict_proba(test_tfidf_matrix)[:, 1]
    y_predict = (y_proba >= best_threshold).astype(int)
    if y_predict == 1:
        print("This is a fact")
    if y_predict == 0:
        print("This is a misinformation")

In [53]:
test_misinfo("Humans only use 10\% of their brains.", grid_search, best_threshold)

This is a misinformation


In [54]:
test_misinfo("You can catch a cold from being outside in the cold without a jacket.", grid_search, best_threshold)

This is a misinformation


In [55]:
test_misinfo("There are research suggesting that vaccines cause autism.", grid_search, best_threshold)

This is a misinformation


In [56]:
test_misinfo("Cold weather alone doesn't cause you to catch a cold; it's the viruses that do.", grid_search, best_threshold)

This is a misinformation


In [57]:
test_misinfo("Vaccines are safe and effective in preventing many diseases; there is no link to autism.", grid_search, best_threshold)

This is a misinformation


In [58]:
test_misinfo("Sleep needs vary by person and age, but 7-9 hours is generally recommended for adults.", grid_search, best_threshold)

This is a misinformation


In [59]:
test_misinfo("Not all politicians are corrupt; many work diligently to serve their communities and uphold democratic values.", grid_search, best_threshold)

This is a misinformation


In [60]:
test_misinfo("The U.S. Constitution is a living document, but changes to it require a rigorous process, including approval from two-thirds of Congress and ratification by states.", grid_search, best_threshold)

This is a misinformation


In [63]:
test_misinfo("COVID 19 was one of the scariest events since the market crash in 2008. Many people lost their lives and the world went into a hault which causes inflation to rise", grid_search, best_threshold)

This is a misinformation


In [64]:
test_misinfo("", grid_search, best_threshold)

This is a misinformation


In [None]:
test_news = """The Trump administration raised tariffs on global steel and aluminum imports to 25% Wednesday as President Donald Trump and his administration dig in on trade policies that have rocked financial markets.
Allies of the U.S., such as the European Union, responded in kind with a hail of criticism and reciprocal tariffs. The EU announced retaliatory tariffs early Wednesday on $28 billion-worth of a wide range of U.S. goods imported to Europe such as boats, motorbikes and alcohol.
President Trump announced the tariffs last month and the White House said Wednesday that there would be no exceptions.
“President Trump has once again used the leverage of the American economy, which is the best and biggest in the world, to deliver a win for the American people,” White House spokesperson Kush Desai said in a statement.
Calling the new U.S. tariffs “unjustified,” the European Commission said that the levies would kick in on April 1, with additional countermeasures introduced in mid-April. 
While President Trump has suggested he wants to negotiate a broader trade agreement with the U.K., Britain did not escape the taxes. U.K. business and trade secretary Jonathan Reynolds called the levies “disappointing,” but said his country was “focused on a pragmatic approach” and negotiating a broader deal.
US Pauses Tariffs On Canada, Mexico USMCA Trade Until April 2
President Trump during an executive order signing ceremony in the Oval Office on March 6.Al Drago / Bloomberg / Getty Images
The European reaction echoed that of Canada, another of America's traditional close allies and trading partners, on which Trump had already imposed a 25% metals tax.
New tariffs were briefly expected to go even higher for Canada, the top U.S. export market. President Donald Trump said Tuesday they would jump to 50% in response to Ontario Premier Doug Ford’s threat to impose a 25% surcharge on electricity imports into the United States to match the initial U.S. hike.
Economy
Price growth set to remain stubborn in February — and beyond
U.S. news
Bourbon industry in crosshairs of U.S.-Canada trade war
A conversation between Ford and Commerce Secretary Howard Lutnick, prevented this though, with Trump indicating a doubling of U.S. tariffs for Canada was no longer likely.
In a joint-statement with Lutnick posted on X, Ford said they would meet Thursday alongside the U.S. trade representative to discuss renewing the U.S.-Mexico-Canada free trade act.
In a Truth Social post Monday, Trump called Canada a longtime “tariff abuser,” going beyond his metals threats to say that “The United States is not going to be subsidizing Canada any longer,” and that “We don’t need your Cars, we don’t need your Lumber, we don’t your Energy, and very soon, you will find that out.”
Other long-time U.S. allies also failed to escape.
Australia, which had been hoping for the same exemption from steel and aluminum tariffs it received during the first Trump administration, said it would not retaliate.
‘Economic self-harm’: Australia says it won’t impose reciprocal tariffs on U.S.
Australian Prime Minister Anthony Albanese called the tariff “entirely unjustified,” but added that imposing reciprocal tariffs “would only push up prices for Australian consumers,” and that Australia would impose no new tariffs on U.S. goods.
Perhaps the sharpest response came from China, the world's largest steel producer and second-largest economy, whic had already been slapped with a 20% blanket tariff on Trump's return to office, and announced new tariffs in return.
Beijing's foreign ministry accused the U.S. of violating World Trade Organization rules and said it would take “all necessary measures” to protect its rights and interests.
“No one wins in a trade war or a tariff war, a view widely shared by the international community,” spokesperson Mao Ning said at a media briefing.
China Steel Production
Steel production in China's eastern Zhejing province on Feb. 14, 2025.
America's Asian allies in South Korea and Japan — both are major steel producers — were also hit.
South Korea, which Trump singled out in his address to Congress last week over its tariff policies, is scrambling to minimize the impact of U.S. tariffs on its domestic industries. Trade Minister Cheong In-kyo's office said Wednesday he will be in Washington for two days starting Thursday for tariff discussions.
Yoshimasa Hayashi, the Japanese government’s top spokesperson, told reporters in Tokyo it was “regrettable” that the new U.S. tariffs had been imposed “without excluding Japan.”
Stock market investors have made their feelings about the tariffs clear, with major indexes plunging on Trump's earlier tariff threats, before rallying on the suspension announcement for Canada.
Pre-market futures for the Dow Jones Industrial Average, the S&P 500 and the Nasdaq all inched higher early Wednesday after plunging in recent days.
Aside from steel and aluminum, Trump on Tuesday called on Canada to drop its duties on U.S. dairy products and threatened to "substantially increase" tariffs on cars imported into the United States if Canada did not drop "other egregious, long time tariffs."
President Donald Trump imposed vast tariffs this week on key partners Canada and Mexico, roiling cross-border ties before offering temporary relief to manufacturers -- but with more levies kicking in next week, the respite may be fleeting. 
A truck carrying vehicles prepares to cross into the US from Canada at the Ambassador Bridge in Windsor, Ontario, on March 8.Geoff Robins / AFP via Getty Images
The auto tariffs, Trump warned, without citing evidence, “essentially, permanently shut down the automobile manufacturing business in Canada.” He also doubled down on some of his recent rhetoric about making Canada part of the United States.
Canada has quickly emerged as a target of Trump’s ire in his second term. Trump this month instituted, then pulled back, tariffs on a variety of Canadian goods as he goaded its leaders and blamed them for a lack of action on fentanyl trafficking.
Relatively little of the drug is seized at the U.S.'s northern border compared with that with Mexico.
While Mexico has faced similar tariff threats and rhetoric, its president, Claudia Sheinbaum, has succeeded in assuaging Trump.
Newly appointed Canadian Prime Minister Mark Carney subsequently called Trump's threats "an attack on Canadian workers, families and businesses."
He said Canadian tariffs on U.S. goods would remain "until the Americans show us respect and make credible, reliable commitments to free and fair trade."
Morgan Stanley analysts warned that because the United States is a net importer of steel and aluminum from Canada, the tariffs would lead to higher domestic prices."""

test_misinfo(test_news, grid_search, best_threshold)

This is a fact


In [None]:
test_news = """
Washington, D.C. - October 25, 2023

In a groundbreaking press conference today, NASA scientists revealed that they have discovered a fully intact alien city on the surface of Mars. The city, reportedly located near the planet's equator, features towering skyscrapers, advanced transportation systems, and what appears to be a central government building. According to NASA Administrator Bill Nelson, the discovery was made using the Mars Rover's newly upgraded "Alien Detection Module," which can scan for signs of extraterrestrial life and infrastructure.

"This is the most significant discovery in human history," Nelson declared. "We have definitive proof that intelligent life once thrived on Mars. The city is estimated to be over 10,000 years old, and its inhabitants likely fled the planet due to climate change."

The announcement has sent shockwaves through the scientific community and the general public. Social media platforms are abuzz with speculation about what this means for humanity's future. Some experts are even suggesting that the U.S. government should immediately begin planning a manned mission to Mars to establish contact with any remaining alien inhabitants.

However, not everyone is convinced. Dr. Emily Carter, a prominent astrophysicist, expressed skepticism about the findings. "This seems too good to be true," she said. "NASA has a history of making bold claims that later turn out to be exaggerated. I'll believe it when I see peer-reviewed evidence."

In response to the skepticism, NASA released a series of images allegedly taken by the Mars Rover. The photos show what appear to be metallic structures and intricate road networks. However, some critics have pointed out that the images could be digitally altered or misinterpreted geological formations.

The White House has already announced plans to increase NASA's budget by $500 billion to support further exploration of the alien city. President Biden is expected to address the nation later this week to discuss the implications of the discovery for national security and international relations.

As the world waits for more information, one thing is clear: the discovery of an alien city on Mars has forever changed our understanding of the universe—and our place within it.
"""

test_misinfo(test_news, grid_search, best_threshold) # misinfo

This is a misinformation


Our model works very well on very long text. This makes sense because TF-IDF is a sparse vector representation. When the sentence is short, the vector nearly looks like an all 0 vector. This leads it to default to misinformation.

In [67]:
test_news = """Amy Gleason, a former emergency room nurse-turned-health care technologist, became determined to help other patients after her daughter, Morgan, was diagnosed with a rare autoimmune disorder in 2010. Morgan had suffered from rashes and muscle weakness, and it took more than a year for a diagnosis.

Gleason's focus has been on simplifying access to electronic medical records to prevent similar delays in diagnosis. In February 2025, the White House announced Gleason as the acting administrator for the Department of Government Efficiency (DOGE), elevating her to a prominent role in the Trump administration.

Gleason had previously worked on health data projects with the U.S. Digital Service and DOGE’s predecessor during both Trump’s first term and the Biden administration. However, the White House has not provided details about why she was selected to lead DOGE, and there are questions about her authority versus Elon Musk’s involvement with the task force.

Despite her appointment, Musk’s influence remains significant. The administration has not clarified who truly leads DOGE, and Gleason has not made public comments since her appointment. Legal questions regarding DOGE's operations have also emerged, and the group's structure remains largely unknown.

Gleason’s colleagues describe her as intelligent, dedicated, and a thorough thinker. Dr. Gregg Alexander, a pediatrician who has known her for 20 years, stated, “She’s always tried to do the right thing.” However, some worry that in her new role, Gleason may indirectly contribute to cuts in programs that are important to her, such as rare disease research. DOGE has threatened budget cuts to agencies like the CDC and NIH, which fund such research.

Gleason’s daughter, now in her mid-20s, was diagnosed with juvenile dermatomyositis, a rare disease where the immune system attacks its own cells and tissues. Research partnerships with NIH have improved treatment options for juvenile myositis, though concerns exist about the impact of proposed budget cuts.

Gleason's work in the private sector includes roles at Allscripts, CareSync, and Main Street Health. She has also been involved in launching innovative health care solutions with Russell Street Ventures. Her experience in health technology and data has made her a key figure in improving access to health care.

Friends and colleagues describe her as apolitical, driven by a passion for improving patient care through technology. Despite her appointment to DOGE, some former colleagues expressed surprise at the shift from patient advocacy to a role that could involve cost-cutting measures, with one calling the transition a “dichotomy in values.”

Gleason, a graduate of the University of Tennessee, Knoxville, is an avid football fan with a great sense of humor and a love for travel. She has long been interested in streamlining medical records to improve patient outcomes, a passion that dates back to her early career as a nurse."""

test_misinfo(test_news, grid_search, best_threshold)

This is a misinformation


In [69]:
test_news = """Canada’s new prime minister has friends in high places … and low places.

Mark Carney has been appointed the prime minister of Canada after winning the Liberal Party’s leadership race.

The economist, who formally served as the governor of the bank of England, has made friends with some questionable people over the years, raising eyebrows.

Carney attended a dinner with the infamous Prince Andrew, a known associate of convicted pedophile Jeffrey Epstein, as was reported by the Daily Mail.

Carney was pictured with convicted sex offender and Epstein associate Ghislaine Maxwell during the Wilderness Festival in England, as was reported by True North.

It is important to note that it was already widely known that Epstein was a sex offender and pedophile at the time, leading to questions about the connections between Carney, Andrew, and Maxwell.

While there is no evidence to connect Carney to Epstein at this time, it appears Carney’s sister-in-law was in contact with Epstein.

Further releases of the Epstein files may shed more light on the alleged connections between Carney and Epstein."""

test_misinfo(test_news, grid_search, best_threshold)  # misinfo

This is a misinformation


In [68]:
test_news = """Amy Gleason, a former emergency room nurse-turned-health care technologist, was scared. It was 2010 and no doctor could figure out what was behind her daughter Morgan’s strange constellation of symptoms, including rashes and muscle weakness so severe that she could no longer walk upstairs.

When Morgan was finally diagnosed with a rare and potentially life-threatening autoimmune disorder after more than a year, Gleason became determined to empower other patients so they didn’t face similar delays in diagnosis.

“If a doctor had seen all of these visits and activity on one single screen put together, they probably would have wondered why this 10- or 11-year-old is going to the doctor all the time,” Gleason said in a 2020 TEDx talk. “And maybe that would have sparked a faster diagnosis.”

Until recently, Gleason, 53, had been a relatively low-profile health care data cruncher with a passion for simplifying access to electronic medical records.

Then, at the end of February, the White House announced Gleason had been named the acting administrator for the Department of Government Efficiency, elevating her to a prominent position in the Trump administration.

Gleason previously worked on projects related to health data at the U.S. Digital Service, DOGE’s predecessor, overlapping with Trump’s first term and the Biden administration.

However, the White House has not provided details about why, exactly, it selected Gleason to lead DOGE — a task force unit at the center of the administration’s efforts to streamline the federal government. 

The move has led many to question whether Gleason is truly in charge or whether the power resides with Elon Musk, the world’s richest man and a special government employee who has been the face of DOGE.

For weeks, the administration evaded questions about who was actually at the helm; the White House said Gleason was the acting administrator only after administration lawyers were unable to answer who was in charge of the agency when questioned in court. Gleason does not appear to have made any public comments since the White House announced that she was DOGE's top official.

The administration has also revealed very little about who else works for DOGE and what they do, despite Musk’s claims of transparency.

Even with Gleason’s title, Musk still seems to hold sway. As recently as Tuesday, Trump referred to DOGE as “headed by Elon Musk,” setting off fresh legal questions about the group’s operations. The working relationship between Musk and Gleason is unclear, and a DOGE spokesperson did not respond Friday to questions about Gleason’s job responsibilities.

Gleason also did not respond to a request for comment for this article. In interviews, former colleagues described her as highly intelligent and the most valuable asset wherever she works.

“It’s exactly the kind of person you need in a role like this,” said Dr. Gregg Alexander, a pediatrician in London, Ohio, who has known her for about 20 years. “She’s always tried to do the right thing.”

Still, some former colleagues worry that in her DOGE role, Gleason will be inadvertently complicit in cuts to programs that have personal significance to her — including research for rare disease funding. DOGE has threatened dramatic budget cuts to federal health agencies such as the Centers for Disease Control and Prevention and the National Institutes of Health.

The condition that Gleason’s daughter, who is now in her mid-20s, was diagnosed with is called juvenile dermatomyositis. The extremely rare disease is a form of juvenile myositis, in which a child’s immune system attacks its own cells and tissues.

Therapies discovered over the years thanks to partnerships with NIH have improved the prognosis for juvenile myositis, said James Minow, executive director at the advocacy organization Cure JM Foundation, where Gleason served as a board member and vice president for research from 2014 to 2018, according to her LinkedIn profile.

But with the Trump administration trying to cut NIH grant funding, Minow said he worried that DOGE could hamper the rare disease research that Gleason’s family and so many others depend on.

“Amy is a very thorough thinker, and I think that she’ll be one who will make very solid, reasoned recommendations to the president as he looks at fulfilling what he sees as his mission to reduce the size of government,” Minow said. “Obviously, Cure JM is wanting to do everything we can to protect NIH’s investment.”

During the latter part of her time there, she worked on the data team for the White House Coronavirus Task Force, creating databases from hospitals and labs that governors and the public relied on to track the virus. Her LinkedIn profile says she rejoined the U.S. Digital Service in January of this year as a senior adviser, though The New York Times reported she was reintroduced at the agency in late December, ahead of Trump’s inauguration.

A long history in the private sector
Gleason has also worked in the private sector at various health care management companies and startups. She held vice president positions at Allscripts, which provided software for electronic medical records, and worked from 2011 to 2018 at CareSync, a Florida-based medical technology startup that she co-founded, according to LinkedIn.

Her LinkedIn profile adds that from 2021 to 2024, she was vice president of product at Main Street Health, which provides care for people in rural areas, and at Russell Street Ventures, a firm dedicated to launching innovative health care.

Both Main Street Health and Russell Street Ventures were founded by entrepreneur Brad Smith, an early senior DOGE member who was previously named as head of the Center for Medicare & Medicaid Innovation in 2020 during Trump’s first administration.

Smith did not respond to a request for comment; according to anonymous sources who spoke to The New York Times, Smith began advising on Musk’s cost-cutting moves late last year and brought Gleason in on the talks. NBC News has not confirmed the report.

Tom Cooke, a retired health care executive who worked closely with Gleason more than 15 years ago, said her position at DOGE was “kind of a curveball.”

“I’ll put my politics on my sleeve: I don’t trust Elon Musk at all in this role. I trust her completely,” he said. “I am confident that she will use her voice strongly and that she’s a straight shooter, whether it’s news that people above her want to hear or not.”

Cooke described Gleason as having an effervescent personality and an unflappable work mentality.

“Professionally, I put a lot on her plate to get done in a very short period of time, and was amazed by her ability to achieve that,” he said.

And on a personal level, “I’ve seen her be really thoughtful with folks that she may have had just a little bit of interaction with,” he said. “She just has a way with people.”

Others were also surprised by her DOGE title. One former health care IT colleague said via a LinkedIn message that “it did seem to come out of nowhere.”

“I was shocked to hear of her appointment to DOGE, having been a fierce and committed patient advocate,” wrote the former colleague, who has known Gleason for 15 years and spoke on condition of anonymity because she was concerned speaking out against the Trump administration could have career repercussions. “To go from such a position of kindness to a position that eliminates jobs for thousands of working parents seems like such a dichotomy in values.”

A graduate of the University of Tennessee, Knoxville, Gleason is an avid football fan who likes to needle friends who root for anyone other than the Tennessee Volunteers, said Alexander, the pediatrician. He added that she has a “tremendous sense of humor” and loves to travel.

Gleason’s interest in streamlined medical records and other improvements for patients dates back decades. In 2021, she told the “Tell Me Where IT Hurts” podcast, which examines the intersection between health care and technology, that she started out as an emergency room nurse and “quickly realized how powerful health care technology could be.”

Gleason has said the best career advice she has received was from her parents. She told another health care podcast in 2023 that her dad taught her mistakes are a learning opportunity, and her mom encouraged her to follow her dreams.

“I’ve had a pretty great career trying a lot of new things and following my passions as I develop new ones as well,” she told the podcast."""

test_misinfo(test_news, grid_search, best_threshold)

This is a fact


Very interesting here where the news itself is labelled as a fact but the rephrasing from ChatGPT is labelled as a misinfo. This might be due to the length. There seems to be a pattern here where longer text will make it predict as a fact.

In [None]:
test_news = """A recent report by Pollara is a reminder that social media is not real life.

If you are a frequent user of social media platforms, specifically X (Twitter), you may have come to the conclusion that an overwhelming majority of Conservative voters support Donald Trump and his recent policies.

If one were to give even the slightest criticism of the current American president, they will likely see a barrage of Trump supporters defending their preferred politician in the comment section.

However, a recent report published by research firm Pollara suggests something different.

More than three quarters of Canadians feel negatively about Trump, according to the report. 61% of Conservative voters hold a negative impression of Trump.

A majority of Canadians feel that Trump’s tariffs will have a negative impact on their day-to-day life, and that Canada should respond with retaliatory tariffs.

Trump’s tariff plans was met with condemnation across the political spectrum in Canada, with even staunch right-wing politicians such as Danielle Smith and Pierre Poilievre pushing back.

“The tariffs imposed by U.S. President Donald Trump are an unjustifiable economic attack on Canadians and Albertans,” wrote Smith in a statement posted to X.

“President Trump stabbed America’s best friend in the back,” Poilievre said in a speech delivered on Parliament Hill.

Meanwhile, hard right political commentators such as Rupa Subramanya have used their platform to justify the tariffs at ever turn.

Subramanya has taken stabs at both Smith and Poilievre, as have other influential conservative commentators.

Despite their relatively large audiences, the views shared by the likes of Subramanya are not shared by the majority of Canadians.

At the time of writing, Trump has dialed back tariffs on certain products coming to America from Canada. However, the trade war is still ongoing."""

test_misinfo(test_news, grid_search, best_threshold)  # misinfo

This is a misinformation


In [71]:
test_news = """
Ahead of a Wednesday hearing at which lawyers for a Palestinian activist who is a legal permanent U.S. resident will challenge his planned deportation, the State Department and the White House pointed to a provision that allows green card holders to be removed from the country if they present “potentially serious adverse foreign policy consequences for the United States.”

“In such cases, the Secretary of State notifies the Secretary of the Department of Homeland Security, who has the authority to initiate removal charges,” a State Department spokesperson said in response to questions about the circumstances of Mahmoud Khalil’s arrest.

Khalil, a recent Columbia University graduate and Palestinian activist whom federal immigration authorities arrested at university housing Saturday, has not been criminally charged.

President Donald Trump has said the apprehension of Khalil, who organized and led pro-Palestinian rallies at Columbia last year, would be the “first arrest of many” as his administration cracks down on campus opposition to the war in Gaza.

The Department of Homeland Security has accused Khalil of leading “activities aligned to Hamas, a designated terrorist organization.”

The White House said Tuesday that pro-Hamas propaganda was distributed at the campus protests Khalil organized.

“This administration is not going to tolerate individuals having the privilege of studying in our country and then siding with pro-terrorist organizations that have killed Americans,” White House press secretary Karoline Leavitt said.

Protests continue for detained pro-Palestinian activist

A lawyer for Khalil, Samah Sisay, rejected the claim, saying there is no evidence that his client provided support of any kind to a terrorist organization.

Khalil's legal team has also said that his arrest was a violation of his free speech rights and that he was targeted because of his viewpoints.

“He was chosen as an example to stifle entirely lawful dissent in violation of the First Amendment,” attorney Amy Greer said.

Adam Cox, a law professor and immigration expert at New York University, said the provision in the Immigration and Nationality Act provision cited by the administration has historically been used in rare circumstances.   

“If the administration began to use that in a widespread way to arrest folks and argue that they’re deportable, that would be an enormous change from historical practice,” he said.

Leavitt said more arrests are expected. The Department of Homeland Security is using intelligence to identify more “pro-Hamas” students and has provided Columbia University with the names of others who she said “engaged in pro-Hamas activity.”

The school has declined to help the department, she said.

Khalil is being held in a detention center in Louisiana, where he has an initial hearing in immigration court scheduled for March 21.

His legal team is fighting for his return to New York City.

“I need your help to bring Mahmoud home, so he is here beside me, holding my hand in the delivery room as we welcome our first child into this world,” his wife, who is eight months pregnant, said in a statement released through their lawyers.
"""

test_misinfo(test_news, grid_search, best_threshold)

This is a fact


In [72]:
test_news = """The UK and NHS could help license the world’s first norovirus vaccine as a groundbreaking clinical trial is set to begin over the next two weeks to evaluate a new mRNA vaccine.
Including at 27 NHS sites, the phase three trial will recruit across Great Britain with 2,500 participants expected to join the study between this month and early next year.

Cutting edge innovation
The trial comes as part of the UK Government’s deal with US pharma company Moderna two years ago. The study will look into the vaccine know as mRNA-1403, investigating:

Efficacy
Safety
Immunogenicity
The trial is being delivered through the National Institute for Health and Care and the UK Vaccine Innovation Pathway, with NIHR south west regional director, Dr Patrick Moore, acting as the UK’s chief investigator.

Comment from Wes Streeting, secretary of state for health and social care

“The UK is really pleased to be able to play an important role in helping find an effective vaccine against this highly contagious disease,” he said. “Outbreaks of norovirus have huge consequences, both on our health systems and our economy. This innovative trial is crucial in helping us advance healthcare.”

The NIHR – along with the devolved administrations – is working on a range of recruitment techniques to streamline the trial’s delivery, including its flagship tool, Be Part of Research.

Potential impact
It is thought that norovirus, characterised as a transmissible stomach bug, costs the NHS upwards of £100m a year, with annual infections estimated to be around 3.7 million.

Wes Streeting, the health secretary, said: “Not only is this a huge vote of confidence in the UK’s life sciences sector, but a successful vaccine will help shift our health system away from sickness and towards prevention – reducing pressure on the NHS and keeping people well during the colder months.”

This is the final stage before the vaccine’s manufacturers can apply to the Medicines and Healthcare products Regulatory Agency for a licence.

There are currently no licenced norovirus vaccines anywhere in the world. CEO at the NIHR, Professor Lucy Chappell, added that the vaccine has the potential to impact the lives of many – especially society’s most vulnerable.

As a whole, the move will be seen as a major vote of confidence to the UK’s life sciences sector.

To learn more about what is happening in the life sciences sector, register for an exclusive NHE online conference tomorrow."""

test_misinfo(test_news, grid_search, best_threshold)

This is a fact
