In [None]:
# Install required packages

!pip install spacy
!python -m spacy download en_core_web_md
!python -m nltk.downloader punkt_tab
!python -m nltk.downloader punkt
!python -m nltk.downloader stopwords


# Import required libraries
import spacy
import numpy as np
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk

# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')

def preprocess_text(text):
    """
    Preprocess the text by converting to lowercase, removing stopwords and punctuation
    """
    # Tokenize into words
    tokens = word_tokenize(text.lower())

    # Remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token.isalnum() and token not in stop_words]

    return ' '.join(tokens)

def get_sentence_embeddings(nlp, text):
    """
    Get sentence embeddings using spaCy
    """
    doc = nlp(text)
    return doc.vector

def calculate_similarity(vec1, vec2):
    """
    Calculate cosine similarity between two vectors
    """
    dot_product = np.dot(vec1, vec2)
    norm1 = np.linalg.norm(vec1)
    norm2 = np.linalg.norm(vec2)

    if norm1 == 0 or norm2 == 0:
        return 0

    return dot_product / (norm1 * norm2)

def hallucination_score(source_text, generated_text):
    """
    Calculate hallucination score between source and generated text

    Args:
        source_text (str): The source/reference text
        generated_text (str): The generated/comparison text

    Returns:
        float: Hallucination score between 0 and 1
    """
    try:
        # Load spaCy model
        nlp = spacy.load('en_core_web_md')

        # Preprocess texts
        source_processed = preprocess_text(source_text)
        generated_processed = preprocess_text(generated_text)

        # Get sentence embeddings
        source_embedding = get_sentence_embeddings(nlp, source_processed)
        generated_embedding = get_sentence_embeddings(nlp, generated_processed)

        # Calculate similarity score
        consistency_score = calculate_similarity(source_embedding, generated_embedding)

        return float(consistency_score)

    except Exception as e:
        print(f"Error calculating factual consistency score: {str(e)}")
        return None

# Example usage

#write or paste your original text here.
source = """
The Renaissance was a period of cultural rebirth in Europe that began in Italy during the 14th century. This movement was characterized by renewed interest in classical art and learning, leading to significant advances in art, architecture, and science.
"""

#write or paste the AI-output text here.
generated = """
The Renaissance started in Italy in the 1300s and was a time when European culture experienced a major revival. People became very interested in studying ancient Greek and Roman works, which sparked big developments in things like painting, building design, and scientific discovery.
"""

score = hallucination_score(source, generated)
if score is not None:
    print(f"Consistency Score: {score:.4f}")

Collecting en-core-web-md==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-md
Successfully installed en-core-web-md-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Factual Consistency Score: 0.8996
