In [1]:
#Turn on Developer Mode on you computer
#Settings -> Update & Security -> For Developers -> Enable Developer Mode

In [1]:
import pandas as pd
import numpy as np
from transformers import pipeline
from tqdm import tqdm

# Load the dataset
data_file = 'Publications.csv'
try:
    data = pd.read_csv(data_file)
except FileNotFoundError:
    raise FileNotFoundError(f"The file {data_file} was not found. Please check the path.")

# Check if the 'Title' column exists
if 'Title' not in data.columns:
    raise ValueError("The dataset does not contain a 'Title' column.")

# Load a pre-trained model for text classification
# Using Hugging Face zero-shot classification for readability scoring
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=-1)

# Function to score readability based on model output
def score_readability(title):
    prompt = f"How engaging is the article title to read the full article?"
    choices = ["Very poor", "Poor", "Average", "Good", "Excellent"]
    result = classifier(title, choices, truncation=True)
    
    # Map the labels to scores
    label_scores = {
        "Very poor": 1,
        "Poor": 2,
        "Average": 3,
        "Good": 4,
        "Excellent": 5
    }
    
    # Extract the label with the highest score
    best_label = result['labels'][0]
    return label_scores.get(best_label, 3)  # Default to 3 if label not found

# Wrap the titles with tqdm for a progress bar
tqdm.pandas()  # Enables progress_apply

# Apply scoring to each title with progress bar
data['Readability_Score'] = data['Title'].progress_apply(score_readability)

# Save the results to a new file
output_file = 'Publications_with_scores.csv'
data.to_csv(output_file, index=False)

print(f"Readability scores have been saved to {output_file}")


Device set to use cpu
100%|██████████| 4289/4289 [1:36:47<00:00,  1.35s/it]

Readability scores have been saved to Publications_with_scores.csv



