In [5]:
import numpy as np

# Load the GloVe embedding from file into dictionary
def load_glove_embeddings(file_path):
    embeddings_index = {}
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    return embeddings_index

# Path to GloVe file
glove_path = 'glove.6B.300d.txt'
embeddings = load_glove_embeddings(glove_path)

# Function to get the embedding of a headline
def get_headline_embedding(headline, glove_model):
    # Split headline into words and convert to lowercase
    words = headline.lower().split()

    # Get vector for each word that exists in the model
    word_vectors = [glove_model[word] for word in words if word in glove_model]

    # Return None if no words were found in the model
    if not word_vectors:
        return None
    
    # Return average of all word vectors
    return np.mean(word_vectors, axis=0)

In [6]:
# False Headlines
false_headlines=[
    "BREAKING: Fox Sports Cancels ALL NFL Broadcasts \"Until Players Respect The Flag\"",
    "Rugby Safer Than American Football \"For Health Reasons\": Biden",
    "First woman to medal in six straight Olympics. Media and sponsors ignore her because she is outspoken pro-2A.",
    "Novak Djokovic becomes the first professional athlete in history to be banned from a major sporting competition for not taking drugs",
    "BREAKING: ESPN has fired Shannon Sharpe, per @ESPNNBA",
    "The Minnesota Vikings have denounced Tim Walz: \"We don’t suppᴏrt his values.\"",
    "'BREAKING: WNBA referees disqualify two players under league’s new \"no anthem kneeling\" rule",
    "Nike announces termination of contract with Brittney Griner after \"strong backlash\" from online community: \"We need more athletes like Riley Gaines and less woke Brittney Griner!\"",
    "KNEELING: After the University of Texas, all students who knelt during the national anthem were rounded up and REMOVED FROM SCHOLARSHIPS",
    "Travis Kelce kneels during national anthem fined $10 million and thrown out of the game.",
    "The NFL will now use facial recognition at every stadium to verify the identity of everyone at the game.",
    "Mike Tyson says he’s willing to box Olympic DUDE with all proceeds to go to a battered women’s charity.",
    "After winning silver, Yusef stood emotionless on the Olympic podium and declared, \"Sharon, if you’re watching this, I want my dog back.\"",
    "Miami Dolphins QB Tua Tagovailoa will be sitting front row tonight in Doral for the Trump speech",
    "BREAKING: The WNBA organizers have officially announced an investigation into the referees in all of Caitlin Clark's games for ignoring all dirty actions by her opponents against her",
    "Chiefs' Coach Andy Reid \"fires 3 top players for anthem kneeling.\"",
    "BREAKING: Caitlin Clark Rejects $400 Million Deal From Nike, \"Not With That Kaepernick Clown,\"",
    "At Euro 2020, UEFA (European Football Association) ordered all team captains to wear \"OneLove\" bands. The band was used as a symbol of LGBTQ. But, Portugal captain Cristiano Ronaldo was the only European captain who did not wear the band.",
    "Golden State Warriors refuse to visit White House after winning NBA title: reports",
    "Taylor Swift faces a 10-game NFL ban following controversial political involvement - fans in uproar!"
]

# True Headlines
true_headlines=[
    "Did that really happen? Barbados v Grenada and a deliberate own goal",
    "Breakdancing Will Not Be Busting A Move In 2028 Olympics",
    "Breaking Will Not Be in The 2028 Los Angeles Olympics—What’s Next?",
    "Braves Superstar Sets Atlanta-Era Record in 1st Inning",
    "Only 20 schools in the Football Bowl Subdivision have athletic departments with revenue exceeding expenses",
    "LATEST: Mitchell Stadium named America’s Best High School Football Stadium",
    "Homes of Patrick Mahomes, Travis Kelce burglarized last month",
    "Gregg Popovich recovering from mild stroke, no timeline for return set",
    "Patrick Queen: I wasn’t wanted back with Ravens, it was definitely kind of upsetting",
    "Odell Beckham Jr. boasts about taking 2021 Rams salary in Bitcoin National Football League Odell Beckham Jr. boasts about taking 2021 Rams salary in Bitcoin",
    "Ecuador international soccer player Marco Angulo dies aged 22 following car crash",
    "Shohei Ohtani Baseball Worth $4 Million Lands in Globally-Recognized Skyscraper",
    "Kobe Bryant is the only person to have won both an Olympic medal and an Oscar",
    "Bucks fan predicted Milwaukee-Phoenix NBA Finals all the way back in 2016",
    "Chiefs kicker Butker congratulates women graduates and says most are more excited about motherhood",
    "Kansas City Chiefs player faces backlash for graduation speech criticizing working women, calling Pride a \"deadly sin\"", 
]

# Process false headlines
false_headline_embeddings = {headline: get_headline_embedding(headline, embeddings) 
                           for headline in false_headlines}

# Process true headlines
true_headline_embeddings = {headline: get_headline_embedding(headline, embeddings) 
                          for headline in true_headlines}

In [7]:
# Print first headline and first 10 dimensions of its embedding from false headlines
sample_false_headline = list(false_headline_embeddings.keys())[0]
print("Sample headline:", sample_false_headline)
print("First 10 dimensions of embedding:", false_headline_embeddings[sample_false_headline][:10])

# Print first headline and first 10 dimensions of its embedding from true headlines
sample_true_headline = list(true_headline_embeddings.keys())[0]
print("\nSample true headline:", sample_true_headline)
print("First 10 dimensions of embedding:", true_headline_embeddings[sample_true_headline][:10])

Sample headline: BREAKING: Fox Sports Cancels ALL NFL Broadcasts "Until Players Respect The Flag"
First 10 dimensions of embedding: [-0.10396978  0.2762228   0.09694818 -0.069092   -0.063411    0.22916242
 -0.3158339   0.19786185 -0.08139833 -0.86293197]

Sample true headline: Did that really happen? Barbados v Grenada and a deliberate own goal
First 10 dimensions of embedding: [ 0.03671264  0.2641329  -0.1592579  -0.07886673  0.04698409 -0.01603688
 -0.06407236  0.12505382 -0.04429899 -1.3538185 ]
