In [None]:
# installing the necessary libraries for the project.
!pip install torch
!pip install transformers
!pip install scikit-learn
!pip install feedparser
!pip install joblib
!pip install numpy
!pip install scipy
!pip install pandas
!pip install matplotlib
!pip install seaborn
!pip install tqdm



In [None]:
# Importing required libraries
import torch
from transformers import AutoTokenizer, BertModel
from sklearn.preprocessing import normalize
import feedparser
from joblib import load
import numpy as np
from scipy.special import expit

# Defining credible sources
CREDIBLE_SOURCES = [
    "https://www.ukrinform.net/rss",
    "https://tass.com/rss/v2.xml",
    "https://www.aljazeera.com/xml/rss/all.xml",
    "https://www.kyivpost.com/feed",
    "https://www.rt.com/rss/",
    "https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
    "http://feeds.bbci.co.uk/news/world/europe/rss.xml",
    "https://www.theguardian.com/world/rss",
]

# Defining a more extensive list of keywords related to the Russia-Ukraine war
KEYWORDS = [
    "Ukraine", "Russia", "Kyiv", "Kiev", "Moscow", "Donbas", "Crimea",
    "Putin", "Zelensky", "invasion", "war", "conflict", "NATO", "sanctions",
    "military", "troops", "separatists", "Donetsk", "Luhansk", "Mariupol",
    "Kharkiv", "Odessa", "Zaporizhzhia", "Belarus", "war crimes",
    "ceasefire", "peace talks", "refugees", "humanitarian aid", "shelling",
    "missile strike", "airstrike", "evacuation", "occupation", "territory",
    "sovereignty", "annexation", "mobilization", "cyber attack", "propaganda",
    "diplomacy", "arms supply", "nuclear threat", "Western allies", "Eastern Europe"
]

# Checking device availability
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

# Loading the BERT model and tokenizer
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
bert_model = BertModel.from_pretrained(model_name).to(device)
bert_model.load_state_dict(torch.load('bert_model.pth', map_location=device))
bert_model.eval()

# Getting the BERT embedding with the help of the previously generated model
def get_bert_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = bert_model(**inputs)
    return outputs.last_hidden_state[:, 0, :].cpu().numpy()

def is_relevant_news(text, keywords):
    """Check if the text contains any of the specified keywords."""
    return any(keyword.lower() in text.lower() for keyword in keywords)

def verify_fact(claim, credible_sources, keywords):
    claim_embedding = get_bert_embedding(claim)
    max_similarity = 0
    most_similar_text = ""
    source_of_similar_text = ""
    for source in credible_sources:
        print(f"Checking source: {source}")
        try:
            feed = feedparser.parse(source)
            for entry in feed.entries:
                text = entry.title + " " + entry.get('description', '')
                if not is_relevant_news(text, keywords):
                    continue  # Skip the news article if it's not related to the Russia-Ukraine war
                text_embedding = get_bert_embedding(text)
                similarity = np.dot(normalize(claim_embedding), normalize(text_embedding).T)[0][0]
                if similarity > max_similarity:
                    max_similarity = similarity
                    most_similar_text = text
                    source_of_similar_text = source
        except Exception as e:
            print(f"Error processing source {source}: {e}")
    return most_similar_text, max_similarity, source_of_similar_text

def credibility_score(similarity, threshold=0.5):
    if similarity >= threshold:
        return (similarity - threshold) / (1 - threshold)
    else:
        return 0

def check_fact(claim):
    print(f"Checking claim: {claim}")
    verification, similarity, source = verify_fact(claim, CREDIBLE_SOURCES, KEYWORDS)
    score = credibility_score(similarity)
    print(f"\nMost similar information found:")
    print(f"Source: {source}")
    print(f"Text: {verification}")
    print(f"Similarity: {similarity}")
    print(f"Credibility Score: {score}")

# Loading the pre-saved SVC model using joblib
model_path = "svc_model.joblib"  # Change to your correct path
svc_model = load(model_path)

def Get_predictions(model, texts):
    # Converting texts to BERT embeddings
    embeddings = np.vstack([get_bert_embedding(text) for text in texts])
    decision_scores = model.decision_function(embeddings)  # Get decision function scores

    # Converting decision scores to probabilities using sigmoid function
    probs = expit(decision_scores)  # This will map the scores to the range [0, 1]
    return probs

def classify_claim(claim):
    prediction = Get_predictions(svc_model, [claim])[0]
    return prediction

# Example 1: False Claim
false_claim = "Ukrainian authorities have uncovered a network of Russian bots spreading false information about the war on social media"
print("\nTesting False Claim:")
check_fact(false_claim)
false_claim_probability = classify_claim(false_claim)
print(f"False Claim probability: {false_claim_probability:.4f}")

# Example 2: True Claim
true_claim = "NATO has agreed to fast-track Ukraine's membership application, but with certain conditions."
print("\nTesting True Claim:")
check_fact(true_claim)
true_claim_probability = classify_claim(true_claim)
print(f"True Claim probability: {true_claim_probability:.4f}")

Using device: mps


  bert_model.load_state_dict(torch.load('bert_model.pth', map_location=device))



Testing False Claim:
Checking claim: Ukrainian authorities have uncovered a network of Russian bots spreading false information about the war on social media
Checking source: https://www.ukrinform.net/rss
Checking source: https://tass.com/rss/v2.xml
Checking source: https://www.aljazeera.com/xml/rss/all.xml
Checking source: https://www.kyivpost.com/feed
Checking source: https://www.rt.com/rss/
Checking source: https://rss.nytimes.com/services/xml/rss/nyt/World.xml
Checking source: http://feeds.bbci.co.uk/news/world/europe/rss.xml
Checking source: https://www.theguardian.com/world/rss

Most similar information found:
Source: https://www.kyivpost.com/feed
Text: Russia ‘Highly Discourages’ Use of Dating Apps and CCTV in Its Border Regions Russian authorities issued a memo to warn citizens of the risks associated with geolocation leakage and issued additional orders to troops to prevent Ukraine from accessing military chats.
Similarity: 0.9113008975982666
Credibility Score: 0.822601795196

The code automated fact-checking system where the code benchmarks semantic similarity using BERT embeddings for any given claim against news articles in the reliable sources. Then, a credibility score will be given according to the similarity and classifying the claim likelihood of being true or false using a pre-trained SVC model.