# <span style="color:#FF8888;">Base Model using TF-ISF with <span style="color: #1E90FF;">Standard Cosine Similarity</span> and <span style="color: #1E90FF;">Standard Degree Centrality</span></span>

# 📥 Install Libraries

In [1]:
!pip install rouge
!pip install gspread oauth2client

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


# 📚 Import Libraries

In [2]:
import os
import re
import json
import math
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter, defaultdict
import glob
from nltk.util import ngrams
from rouge import Rouge
import gspread
from oauth2client.service_account import ServiceAccountCredentials

# ⚙️ Settings

In [3]:
representation_technique = "tf-isf"
n_grams = 1
similarity_measure = 1 # 1 means original cosine similarity and 2 means modifed cosine similarity
edge_weight_threshold = 0.05
summary_ratio=0.3
ranking_method = 2 # 1 means degree centrality and 2 means textrank
max_iterations = 100

# 📂 Load JSON Data and Extract Sentences with Indexing

In [4]:
def load_json_file(file_path):
    """Load and parse a JSON file."""
    with open(file_path, 'r', encoding='utf-8') as f:
        return json.load(f)

def extract_sentences(data, skip_title=True):
    """Extract all sentences from the nested JSON structure.
    
    Args:
        data: The JSON data containing paragraphs and sentences
        skip_title: If True, skip paragraph 0 (title)
    
    Returns:
        A list of tuples: (sentence_text, (paragraph_idx, sentence_idx))
    """
    sentences = []
    
    # Go through all paragraphs
    for para_idx in data:
        # Skip paragraph 0 (title) if skip_title is True
        if skip_title and para_idx == '0':
            continue
            
        # Go through all sentences in the paragraph
        for sent_idx in data[para_idx]:
            if isinstance(data[para_idx][sent_idx], str):
                # Store sentence text and its indices
                sentences.append((data[para_idx][sent_idx], (para_idx, sent_idx)))
    
    return sentences

# 🔡 Generate N-Grams from Text

In [5]:
def get_ngrams(text):
    """Extract n-grams from text.
    
    Args:
        text: Input text
        n_grams: (global variable) Size of n-grams (1 for unigrams, 2 for bigrams, etc.)
    
    Returns:
        List of n-grams
    """
    tokens = text.split()
    if n_grams == 1:
        return tokens
    else:
        return [' '.join(gram) for gram in ngrams(tokens, n_grams)]


# 📖 Compute TF-ISF

In [6]:
def compute_tf(sentence):
    """Compute term frequency for a sentence using n-grams."""
    # Extract n-grams from the sentence
    terms = get_ngrams(sentence)
    
    # Count the frequency of each term
    term_count = Counter(terms)
    
    # Calculate term frequency
    sentence_length = len(terms)
    
    # Avoid division by zero
    if sentence_length == 0:
        return {}
        
    tf = {term: count / sentence_length for term, count in term_count.items()}
    
    return tf

def compute_isf(sentence_texts):
    """Compute inverse sentence frequency for all terms in the corpus using n-grams."""
    # Count the number of sentences containing each term
    term_sentence_count = defaultdict(int)
    
    for sentence in sentence_texts:
        terms = set(get_ngrams(sentence))  # Use set to count each term only once per sentence
        for term in terms:
            term_sentence_count[term] += 1
    
    # Calculate ISF: log(total number of sentences / number of sentences containing the term)
    num_sentences = len(sentence_texts)
    isf = {term: math.log(num_sentences / count) for term, count in term_sentence_count.items()}
    
    return isf

def compute_tf_isf(sentence_data):
    """Compute TF-ISF vectors for each sentence using n-grams."""
    # Extract just the text from sentence data for ISF calculation
    sentence_texts = [s[0] for s in sentence_data]
    
    # Get ISF values for all terms
    isf = compute_isf(sentence_texts)
    
    # Compute TF for each sentence
    tf_per_sentence = [compute_tf(sentence) for sentence in sentence_texts]
    
    # Create a list of all unique terms
    all_terms = sorted(list(isf.keys()))
    term_to_idx = {term: idx for idx, term in enumerate(all_terms)}
    
    # Compute TF-ISF for each sentence
    tf_isf_vectors = []
    
    for tf in tf_per_sentence:
        # Initialize vector with zeros
        vector = np.zeros(len(all_terms))
        
        # Fill in TF-ISF values
        for term, tf_value in tf.items():
            if term in term_to_idx:  # Check if term exists (should always be true)
                idx = term_to_idx[term]
                vector[idx] = tf_value * isf[term]
        
        tf_isf_vectors.append(vector)
    
    return tf_isf_vectors, all_terms

# 📏 Compute Cosine Similarity

In [7]:
def cosine_similarity(v1, v2):
    """Compute cosine similarity between two vectors."""
    dot_product = np.dot(v1, v2)
    norm_v1 = np.linalg.norm(v1)
    norm_v2 = np.linalg.norm(v2)
    
    # Avoid division by zero
    if norm_v1 == 0 or norm_v2 == 0:
        return 0
    
    return dot_product / (norm_v1 * norm_v2)


# 📈 Build the Graph

In [8]:
def build_graph(sentence_data):
    """Build a graph where nodes are sentences and edges represent similarity above a threshold."""
    # Compute TF-ISF vectors
    tf_isf_vectors, terms = compute_tf_isf(sentence_data)
    
    # Create graph
    G = nx.Graph()
    
    # Add nodes (sentences) with their indices
    for i, (sentence_text, (para_idx, sent_idx)) in enumerate(sentence_data):
        node_id = f"{para_idx}_{sent_idx}"
        G.add_node(node_id, text=sentence_text, para_idx=para_idx, sent_idx=sent_idx, vector_idx=i)
    
    # Add edges based on cosine similarity
    for i, (_, (para_i, sent_i)) in enumerate(sentence_data):
        node_i = f"{para_i}_{sent_i}"
        for j, (_, (para_j, sent_j)) in enumerate(sentence_data[i+1:], i+1):
            node_j = f"{para_j}_{sent_j}"
            similarity = cosine_similarity(tf_isf_vectors[i], tf_isf_vectors[j])
            #if similarity > edge_weight_threshold:
            G.add_edge(node_i, node_j, weight=similarity)
    
    return G, tf_isf_vectors, terms


# 👀 Visualize Graph

In [9]:
def visualize_graph(G, filename=None):
    """Visualize the graph and display in the console."""
    plt.figure(figsize=(12, 10))
    
    # Use spring layout for better visualization
    pos = nx.spring_layout(G, seed=42)
    
    # Draw nodes and edges
    nx.draw_networkx_nodes(G, pos, node_size=300, node_color='lightblue')
    
    # Get edge weights for line thickness and color
    edge_weights = [G[u][v]['weight'] for u, v in G.edges()]
    
    # Draw edges with varying thickness based on weight
    nx.draw_networkx_edges(G, pos, width=[w*3 for w in edge_weights], alpha=0.7)
    
    # Draw node labels
    node_labels = {node: f"{data['para_idx']}_{data['sent_idx']}" for node, data in G.nodes(data=True)}
    nx.draw_networkx_labels(G, pos, labels=node_labels, font_size=10)
    
    plt.title(f"Sentence Similarity Graph for {filename if filename else 'Sample'}")
    plt.axis('off')
        
    # Instead of saving, just display the plot
    plt.show()

# 🛠️ All Graph Construction Stage

In [10]:
def process_file(file_path):
    """Process a single JSON file and return the results."""
    filename = os.path.basename(file_path)
    #print(f"Processing {filename}...")
    
    # Load data
    data = load_json_file(file_path)
    
    # Extract sentences (skipping title)
    sentence_data = extract_sentences(data, skip_title=True)
    
    if not sentence_data:
        print(f"No sentences found in {filename} after skipping title.")
        return None
    
    # Build graph
    graph, vectors, terms = build_graph(sentence_data)
    
    # Print some statistics
    # print(f"  Number of sentences: {len(sentence_data)}")
    # print(f"  Number of terms: {len(terms)}")
    # print(f"  Number of edges: {graph.number_of_edges()}")
    # print(f"  Average degree: {sum(dict(graph.degree()).values()) / graph.number_of_nodes():.2f}")
    
    return {
        'graph': graph,
        'vectors': vectors,
        'terms': terms,
        'sentences': sentence_data,
        'filename': filename
    }

def process_all_files(folder_path):
    """Process all JSON files in the given folder."""
    results = {}
    
    # Get all JSON files in the folder
    file_paths = glob.glob(os.path.join(folder_path, "*.json"))
    
    for file_path in file_paths:
        result = process_file(file_path)
        if result:
            results[result['filename']] = result
            
            # Only visualize the first file as a sample
            #if len(results) == 1:
            #    visualize_graph(result['graph'], result['filename'])
        
        #print()
    
    return results

# Example usage
if __name__ == "__main__":
    folder_path = "/kaggle/input/graduation-project/preprocessed_classical" 
    results = process_all_files(folder_path)
    
    print(f"Successfully processed {len(results)} files.")

Successfully processed 153 files.


# 🏆 Ranking

## Standard Degree-Centrality

In [11]:
def rank_sentences_by_centrality(results):
    """
    Rank sentences in each file by degree centrality.
    
    Args:
        results: Dictionary of results from process_all_files function
        
    Returns:
        Dictionary with filename as key and list of (sentence_id, score, text) tuples as value,
        sorted by score in descending order
    """
    rankings = {}
    
    for filename, result in results.items():
        graph = result['graph']
        sentences = result['sentences']
        
        # Calculate degree centrality for each node
        centrality = nx.degree_centrality(graph)
        
        # Create a list of (sentence_id, score, text) tuples
        sentence_scores = []
        for node_id, score in centrality.items():
            # Get node data
            node_data = graph.nodes[node_id]
            para_idx = node_data['para_idx']
            sent_idx = node_data['sent_idx']
            text = node_data['text']
            
            sentence_scores.append((node_id, score, text))
        
        # Sort by score in descending order
        sentence_scores.sort(key=lambda x: x[1], reverse=True)
        
        # Store rankings
        rankings[filename] = sentence_scores
        
        # Print top 5 sentences for this file
        #print(f"\nTop 5 sentences for {filename}:")
        #for i, (sent_id, score, text) in enumerate(sentence_scores[:5], 1):
            #print(f"{i}. ID: {sent_id}, Score: {score:.4f}")
            #print(f"   Text: {text[:100]}{'...' if len(text) > 100 else ''}")
        
        # Print some statistics
        scores = [score for _, score, _ in sentence_scores]

    return rankings

## Standard TextRank

In [12]:
def rank_sentences_by_textrank(results):
    """
    Rank sentences in each file by TextRank algorithm.
    
    Args:
        results: Dictionary of results from process_all_files function
        max_iterations: (global variable) Maximum number of iterations for PageRank algorithm
        
    Returns:
        Dictionary with filename as key and list of (sentence_id, score, text) tuples as value,
        sorted by score in descending order
    """
    rankings = {}
    
    for filename, result in results.items():
        graph = result['graph']
        sentences = result['sentences']
        
        # Calculate TextRank scores (PageRank) for each node with max iterations
        textrank_scores = nx.pagerank(graph, max_iter=max_iterations)
        
        # Create a list of (sentence_id, score, text) tuples
        sentence_scores = []
        for node_id, score in textrank_scores.items():
            # Get node data
            node_data = graph.nodes[node_id]
            para_idx = node_data['para_idx']
            sent_idx = node_data['sent_idx']
            text = node_data['text']
            
            sentence_scores.append((node_id, score, text))
        
        # Sort by score in descending order
        sentence_scores.sort(key=lambda x: x[1], reverse=True)
        
        # Store rankings
        rankings[filename] = sentence_scores
        
        # Optional: Calculate some statistics about the scores
        scores = [score for _, score, _ in sentence_scores]
    
    return rankings

# Sentence Selection

In [13]:
def generate_extractive_summary(rankings, file_results, original_folder):
    """
    Generate extractive summaries for each file by selecting top sentences from original files.
    
    Args:
        rankings: The output from rank_sentences_by_centrality
        file_results: Original results from process_all_files
        original_folder: Path to folder containing original sentences
        summary_ratio: (global variable) Percentage of original sentences to include in summary (0.0 to 1.0)
        
    Returns:
        Dictionary with filename as key and summary text as value
    """
    summaries = {}
    
    for filename, ranked_sentences in rankings.items():
        # Load original sentences file
        original_file_path = os.path.join(original_folder, filename)
        try:
            with open(original_file_path, 'r', encoding='utf-8') as f:
                original_data = json.load(f)
        except FileNotFoundError:
            print(f"Warning: Original file {filename} not found in {original_folder}")
            continue
        
        # Count total sentences in original file
        total_sentences = sum(len(paragraph) for paragraph in original_data)
        
        # Calculate number of sentences for summary based on ratio
        num_sentences = max(1, int(total_sentences * summary_ratio))
        
        # Get top N sentences based on the calculated number
        top_sentences = ranked_sentences[:num_sentences]
        
        # Original graph to get paragraph and sentence indices
        graph = file_results[filename]['graph']
        
        # Extract original sentences using IDs
        original_top_sentences = []
        for sent_id, score, *_ in top_sentences:
            # Parse sentence ID to get paragraph and sentence indices
            para_idx, sent_idx = sent_id.split('_')
            
            # Extract original sentence from the original data
            try:
                original_text = original_data[para_idx][sent_idx]
                original_top_sentences.append((int(para_idx), int(sent_idx), original_text))
            except KeyError:
                print(f"Warning: Sentence ID {sent_id} not found in original file {filename}")
                continue
        
        # Sort by paragraph index first, then sentence index for proper document flow
        original_top_sentences.sort()
        
        # Combine sentences into summary
        summary = "\n".join([text for _, _, text in original_top_sentences])
        summaries[filename] = summary
        
        print(f"\nExtractive summary for {filename} ({len(original_top_sentences)} of {total_sentences} sentences, {round(len(original_top_sentences)/total_sentences*100, 1)}%):")
        print(summary)
    
    return summaries

In [14]:
# Example usage
if __name__ == "__main__": 
    # Rank sentences based on the settings above
    if ranking_method == 1:
        rankings = rank_sentences_by_centrality(results)
    else:
        rankings = rank_sentences_by_textrank(results)
    
    # Path to original sentences
    original_folder = "/kaggle/input/graduation-project/original_sentences"
    
    # Generate extractive summaries using original sentences with 30% ratio
    summaries = generate_extractive_summary(
        rankings, 
        results, 
        original_folder
    )


Extractive summary for file143.json (1 of 2 sentences, 50.0%):
في فترة لاحقة من القرون الوسطى تذكر المدينة باسم بيت المقدس الذي يشابه عبارة بيت همقداش والتي تشير لدى اليهود إلى هيكل سليمان

Extractive summary for file123.json (1 of 6 sentences, 16.7%):
قبل اختراع الاقراص الصلبة ،  كانت الاقراص المرنة تستخدم لتخزين نظام تشغيل و برامج الحاسوب المنزلي أيضا ،  حيث ان النواة العديد من أنظمة التشغيل في ذلك الوقت كانت تخزن على ذاكرة روم ،  اما نظام التعامل مع الاقراص فيخزن على اقراص مرنة ،  كنظام التشغيل دوس

Extractive summary for file102.json (1 of 5 sentences, 20.0%):
حرب فيتنام أو الحرب الهندوصينية الثانية كانت نزاع بين جمهورية فيتنام الديموقراطية (فيتنام الشمالية) ،  متحالفة مع جبهة التحرير الوطنية ،  ضد جمهورية فيتنام (فيتنام الجنوبية) مع حلفائها (وكانت الولايات المتحدة الأمريكية إحداهم بين 13 سبتمبر 1956 و1973)

Extractive summary for file129.json (1 of 2 sentences, 50.0%):
اختلفت طرق دفع السفن عبر التاريخ وكان البشر في العديد من الحالات هم الذين يشكلون القوة الدافعة للسفن ،  فكانوا ي

# ⚖️ Evaluation

In [15]:
def evaluate_summaries_with_rouge(generated_summaries, reference_folder):
    """
    Evaluate generated summaries against reference summaries using ROUGE metrics.
    
    Args:
        generated_summaries: Dictionary with filename as key and generated summary as value
        reference_folder: Path to folder containing reference summaries
        
    Returns:
        Dictionary with average ROUGE scores and per-file scores
    """
    # Initialize Rouge calculator
    rouge = Rouge()
    
    # Store all scores
    all_scores = {}
    
    # Store aggregate scores for averaging
    aggregate_scores = {
        'rouge-1': {'f': [], 'p': [], 'r': []},
        'rouge-2': {'f': [], 'p': [], 'r': []},
        'rouge-l': {'f': [], 'p': [], 'r': []}
    }
    
    # Get list of all reference files
    reference_files = os.listdir(reference_folder)
    
    # Evaluate each generated summary against its reference summaries
    for filename, generated_summary in generated_summaries.items():
        # Extract base filename without extension
        base_filename = os.path.splitext(filename)[0]
        
        # Find all reference summaries for this file
        file_refs = [f for f in reference_files if f.startswith(f"{base_filename}_sum")]
        
        if not file_refs:
            print(f"Warning: No reference summaries found for {filename}. Skipping this file.")
            continue
        
        # Load all reference summaries for this file
        valid_reference_summaries = []
        for ref_file in file_refs:
            ref_path = os.path.join(reference_folder, ref_file)
            try:
                with open(ref_path, 'r', encoding='utf-8') as f:
                    ref_content = f.read().strip()
                    # Check if reference summary is empty or only has one sentence
                    if not ref_content:
                        print(f"Warning: Empty reference file {ref_file}. Ignoring this reference.")
                        continue
                    
                    # Count sentences (naive approach - split by period, exclamation mark, or question mark)
                    sentences = re.split(r'[.!?]+', ref_content)
                    sentences = [s.strip() for s in sentences if s.strip()]
                    
                    #if len(sentences) <= 1:
                    #    print(f"Warning: Reference file {ref_file} has only one sentence. Ignoring this reference.")
                    #    continue
                    
                    valid_reference_summaries.append(ref_content)
            except Exception as e:
                print(f"Error reading reference file {ref_file}: {e}")
                continue
        
        if not valid_reference_summaries:
            print(f"Warning: No valid reference summaries for {filename}. Skipping this file.")
            continue
        
        # Calculate ROUGE scores against each reference
        file_scores = {
            'rouge-1': {'f': [], 'p': [], 'r': []},
            'rouge-2': {'f': [], 'p': [], 'r': []},
            'rouge-l': {'f': [], 'p': [], 'r': []}
        }
        
        for ref_summary in valid_reference_summaries:
            try:
                # Make sure generated summary is not empty
                if not generated_summary.strip():
                    print(f"Warning: Empty generated summary for {filename}. Skipping this file.")
                    continue
                    
                # Calculate ROUGE scores
                scores = rouge.get_scores(generated_summary, ref_summary)[0]
                
                # Add scores to file-specific records
                for metric in ['rouge-1', 'rouge-2', 'rouge-l']:
                    for score_type in ['f', 'p', 'r']:
                        file_scores[metric][score_type].append(scores[metric][score_type])
            except Exception as e:
                print(f"Error calculating ROUGE for {filename} with a reference: {e}")
                continue
        
        # Skip this file if no valid scores were calculated
        if not any(file_scores[metric][score_type] for metric in ['rouge-1', 'rouge-2', 'rouge-l'] for score_type in ['f', 'p', 'r']):
            print(f"Warning: Could not calculate any valid ROUGE scores for {filename}. Skipping this file.")
            continue
            
        # Calculate average score across all references for this file
        file_avg_scores = {}
        for metric in ['rouge-1', 'rouge-2', 'rouge-l']:
            file_avg_scores[metric] = {}
            for score_type in ['f', 'p', 'r']:
                if file_scores[metric][score_type]:
                    score_avg = np.mean(file_scores[metric][score_type])
                    file_avg_scores[metric][score_type] = score_avg
                    
                    # Add to aggregate scores for overall average
                    aggregate_scores[metric][score_type].append(score_avg)
                else:
                    file_avg_scores[metric][score_type] = 0.0
        
        # Store file scores
        all_scores[filename] = file_avg_scores
        
        # Print summary for this file
        print(f"\nROUGE scores for {filename}:")
        print(f"  ROUGE-1 F1: {file_avg_scores['rouge-1']['f']:.4f}")
        print(f"  ROUGE-2 F1: {file_avg_scores['rouge-2']['f']:.4f}")
        print(f"  ROUGE-L F1: {file_avg_scores['rouge-l']['f']:.4f}")
    
    # Calculate overall average scores
    average_scores = {}
    for metric in ['rouge-1', 'rouge-2', 'rouge-l']:
        average_scores[metric] = {}
        for score_type in ['f', 'p', 'r']:
            if aggregate_scores[metric][score_type]:
                average_scores[metric][score_type] = np.mean(aggregate_scores[metric][score_type])
            else:
                average_scores[metric][score_type] = 0.0
    
    # Print number of files used in evaluation
    evaluated_files_count = len(all_scores)
    total_files_count = len(generated_summaries)
    print(f"\nEvaluated {evaluated_files_count} out of {total_files_count} files.")
    
    # Print overall summary
    print("\n" + "="*40)
    print("OVERALL AVERAGE ROUGE SCORES:")
    print(f"  ROUGE-1 F1: {average_scores['rouge-1']['f']:.4f}")
    print(f"  ROUGE-2 F1: {average_scores['rouge-2']['f']:.4f}")
    print(f"  ROUGE-L F1: {average_scores['rouge-l']['f']:.4f}")
    print("="*40)
    
    return {
        'average': average_scores,
        'per_file': all_scores,
        'evaluated_files_count': evaluated_files_count,
        'total_files_count': total_files_count
    }

# Example usage
if __name__ == "__main__":
    # Reference summaries folder path
    reference_folder = "/kaggle/input/voted-sentences"
    
    # Assuming 'summaries' is the variable containing our generated summaries
    # from the previous extractive summarization step
    evaluation_results = evaluate_summaries_with_rouge(summaries, reference_folder)
    
    # Additional analysis: Find best and worst performing files
    per_file_scores = evaluation_results['per_file']
    
    # Get F1 scores for ROUGE-1
    f1_scores = [(filename, scores['rouge-1']['f']) 
                 for filename, scores in per_file_scores.items()]
    
    # Sort by score
    f1_scores.sort(key=lambda x: x[1])
    
    print("\nFILE PERFORMANCE ANALYSIS:")
    
    # Check if we have enough files to show stats
    worst_count = min(3, len(f1_scores))
    best_count = min(3, len(f1_scores))
    
    if worst_count > 0:
        print("Worst performing files (ROUGE-1 F1):")
        for filename, score in f1_scores[:worst_count]:  # Bottom 3 or fewer
            print(f"  {filename}: {score:.4f}")
    
    if best_count > 0:        
        print("\nBest performing files (ROUGE-1 F1):")
        for filename, score in f1_scores[-best_count:]:  # Top 3 or fewer
            print(f"  {filename}: {score:.4f}")


ROUGE scores for file123.json:
  ROUGE-1 F1: 0.4368
  ROUGE-2 F1: 0.3695
  ROUGE-L F1: 0.4368

ROUGE scores for file129.json:
  ROUGE-1 F1: 0.0920
  ROUGE-2 F1: 0.0093
  ROUGE-L F1: 0.0805

ROUGE scores for file134.json:
  ROUGE-1 F1: 0.1481
  ROUGE-2 F1: 0.0000
  ROUGE-L F1: 0.1111

ROUGE scores for file150.json:
  ROUGE-1 F1: 0.1096
  ROUGE-2 F1: 0.0244
  ROUGE-L F1: 0.0822

ROUGE scores for file25.json:
  ROUGE-1 F1: 0.0952
  ROUGE-2 F1: 0.0000
  ROUGE-L F1: 0.0952

ROUGE scores for file107.json:
  ROUGE-1 F1: 0.4593
  ROUGE-2 F1: 0.4058
  ROUGE-L F1: 0.4593

ROUGE scores for file42.json:
  ROUGE-1 F1: 0.0594
  ROUGE-2 F1: 0.0000
  ROUGE-L F1: 0.0594

ROUGE scores for file34.json:
  ROUGE-1 F1: 0.1776
  ROUGE-2 F1: 0.0147
  ROUGE-L F1: 0.1308

ROUGE scores for file31.json:
  ROUGE-1 F1: 0.1923
  ROUGE-2 F1: 0.0500
  ROUGE-L F1: 0.1538

ROUGE scores for file122.json:
  ROUGE-1 F1: 0.9060
  ROUGE-2 F1: 0.9118
  ROUGE-L F1: 0.9060

ROUGE scores for file148.json:
  ROUGE-1 F1: 0.1414
 