## Phase 1 

### Retrieve articles related ot topic from Arxiv

In [1]:
import arxiv
import json
import pandas as pd
import urllib.request as libreq
import certifi
import os
import xml.etree.ElementTree as ET
from dotenv import load_dotenv
load_dotenv()
from datetime import datetime, timedelta
from tqdm import tqdm
from IPython.display import display, Latex
from bs4 import BeautifulSoup
import re
import PyPDF2
import io
os.environ['SSL_CERT_FILE'] = certifi.where()

#### Fuzzy Search

In [2]:
client = arxiv.Client()

# Search for the 10 most recent articles matching the keyword "quantum."
search = arxiv.Search(
  query = "Computational Finance",
  max_results = 10,
  sort_by = arxiv.SortCriterion.SubmittedDate
)

results = client.results(search)

# `results` is a generator; you can iterate over its elements one by one...
for r in client.results(search):
  print(r.title)
  print(r.authors)

PERSE: Personalized 3D Generative Avatars from A Single Portrait
[arxiv.Result.Author('Hyunsoo Cha'), arxiv.Result.Author('Inhee Lee'), arxiv.Result.Author('Hanbyul Joo')]
Action-Agnostic Point-Level Supervision for Temporal Action Detection
[arxiv.Result.Author('Shuhei M. Yoshida'), arxiv.Result.Author('Takashi Shibata'), arxiv.Result.Author('Makoto Terao'), arxiv.Result.Author('Takayuki Okatani'), arxiv.Result.Author('Masashi Sugiyama')]
Branes Screening Quarks and Defect Operators
[arxiv.Result.Author('Andreas Karch'), arxiv.Result.Author('Marcos Riojas')]
SoS Certificates for Sparse Singular Values and Their Applications: Robust Statistics, Subspace Distortion, and More
[arxiv.Result.Author('Ilias Diakonikolas'), arxiv.Result.Author('Samuel B. Hopkins'), arxiv.Result.Author('Ankit Pensia'), arxiv.Result.Author('Stefan Tiegel')]
Distributed Mixture-of-Agents for Edge Inference with Large Language Models
[arxiv.Result.Author('Purbesh Mitra'), arxiv.Result.Author('Priyanka Kaswan'), a

#### GIGA CLASS

In [3]:
class ArxivDataframe:
    def __init__(self, subject):
        self.subject = subject.lower()
        self.client = arxiv.Client()
        self.bs4_client = BeautifulSoup
    
    def _retrieve_html(self):
        base_url = f'https://arxiv.org/list/{self.subject}/new'
        page = libreq.urlopen(base_url)
        html = page.read().decode('utf-8')
        return html
    
    def _remove_brackets(self, text):
        """Remove content within brackets from text"""
        return re.sub(r'\(.*?\)', '', text).strip()
    
    def _clean_subjects(self, df):
        """Clean primary and secondary subjects"""
        df['primary_subject'] = df['primary_subject'].map(self._remove_brackets)
        df['secondary_subjects'] = df['secondary_subjects'].map(
            lambda x: [self._remove_brackets(subject) for subject in x] if isinstance(x, list) else x,
            na_action='ignore'
        )
        return df
    
    def _clean_journal(self, df):
        """Clean journal information"""
        df['submitted_journal'] = df['submitted_journal'].str.split(r'[,;:.]').str[0]
        return df
    
    def _extract_affiliations(self, pdf_reader, authors, max_pages=2):
        """
        Extract author affiliations from PDF using a simplified approach with better logging
        """
        print("\n=== Starting Affiliation Extraction ===")
        print(f"Processing authors: {authors}")
        
        affiliations = [None] * len(authors)
        
        try:
            # Get text from first pages
            full_text = ""
            for page_num in range(min(max_pages, len(pdf_reader.pages))):
                try:
                    page_text = pdf_reader.pages[page_num].extract_text()
                    full_text += page_text + "\n"
                    print(f"Successfully read page {page_num + 1}")
                except Exception as e:
                    print(f"Error reading page {page_num + 1}: {str(e)}")
                    continue

            # Clean text
            full_text = re.sub(r'\s+', ' ', full_text)
            
            # Truncate text at common section markers
            section_markers = ['Abstract', 'Introduction', 'Keywords', 'I.', '1.', 'Methods']
            for marker in section_markers:
                pos = full_text.find(marker)
                if pos != -1:
                    full_text = full_text[:pos]
                    print(f"Truncated text at marker: {marker}")
            
            print("\nLooking for affiliation blocks...")
            
            # Simple pattern to find potential affiliation blocks
            affiliation_patterns = [
                # Look for institutional addresses
                r'(?i)(?:Department|University|Institute|Laboratory|School|Center|Centre)[^.]*(?:[^.]*(?:University|Institute|Laboratory|School|Center|Centre)[^.]*)*\.',
            ]
            
            potential_affiliations = []
            for pattern in affiliation_patterns:
                matches = re.finditer(pattern, full_text)
                for match in matches:
                    aff = match.group(0).strip()
                    if len(aff) > 20:  # Filter out very short matches
                        potential_affiliations.append(aff)
                        print(f"Found potential affiliation: {aff}")
            
            # Remove duplicates while preserving order
            potential_affiliations = list(dict.fromkeys(potential_affiliations))
            
            print(f"\nFound {len(potential_affiliations)} unique potential affiliations")
            
            # For each author, try to find their affiliation
            for i, author in enumerate(authors):
                try:
                    author_name = author.split()[-1]  # Get last name
                    print(f"\nProcessing author: {author} (searching for: {author_name})")
                    
                    # Look for affiliations near author name
                    author_pos = full_text.find(author)
                    if author_pos != -1:
                        # Look at text chunk around author mention
                        window = 500  # Increased window size
                        start = max(0, author_pos - window//2)
                        end = min(len(full_text), author_pos + window//2)
                        nearby_text = full_text[start:end]
                        
                        author_affiliations = []
                        for aff in potential_affiliations:
                            if aff in nearby_text:
                                author_affiliations.append(aff)
                                print(f"Found matching affiliation: {aff}")
                        
                        if author_affiliations:
                            affiliations[i] = author_affiliations
                        else:
                            print(f"No affiliations found near author {author}")
                    else:
                        print(f"Could not find author {author} in text")
                
                except Exception as e:
                    print(f"Error processing author {author}: {str(e)}")
                    continue
            
            print("\n=== Affiliation Extraction Complete ===")
            print(f"Final affiliations: {affiliations}")
            return affiliations
            
        except Exception as e:
            print(f"Error in affiliation extraction: {str(e)}")
            return [None] * len(authors)
        
    def _extract_pdf_metrics(self, pdf_reader):
        """Extract metrics (pages, figures, tables) from PDF"""
        metrics = {
            'pages': len(pdf_reader.pages),
            'figures': 0,
            'tables': 0
        }
        
        for page in pdf_reader.pages:
            text = page.extract_text()
            # Find figures
            figure_numbers = re.findall(r'(?i)(?:Figure|Fig.|Figure.|Fig})\s+(\d+)', text)
            if figure_numbers:
                metrics['figures'] = max(metrics['figures'], max(map(int, figure_numbers)))
            
            # Find tables
            table_numbers = re.findall(r'(?i)(?:Table|Table.})\s+(\d+)', text)
            if table_numbers:
                metrics['tables'] = max(metrics['tables'], max(map(int, table_numbers)))
                
        return metrics
    
    def _process_pdf(self, pdf_link, current_metrics=None, authors=None):
        """Process PDF to extract metrics, keywords, and affiliations"""
        try:
            pdf_response = libreq.urlopen('https://' + pdf_link)
            pdf_file = pdf_response.read()
            pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
            
            # Extract metrics if needed
            metrics = self._extract_pdf_metrics(pdf_reader)
            
            # Only update metrics that are currently NaN
            if current_metrics:
                for key in metrics:
                    if pd.isna(current_metrics[key]):
                        current_metrics[key] = metrics[key]
                metrics = current_metrics
            
            # Extract keywords
            keywords = self._extract_keywords(pdf_reader)
            
            # Extract affiliations if authors are provided
            affiliations = None
            if authors:
                affiliations = self._extract_affiliations(pdf_reader, authors)
            
            return {**metrics, 'keywords': keywords, 'affiliations': affiliations}
            
        except Exception as e:
            print(f"Error processing PDF {pdf_link}: {str(e)}")
            return None
        
    def _extract_keywords(self, pdf_reader, max_pages=5):
        """
        Extract keywords from PDF with improved accuracy and efficiency across subjects
        Args:
            pdf_reader: PyPDF2.PdfReader object
            max_pages: Maximum number of pages to search (default: 5, as keywords are usually at the start)
        Returns:
            list: Extracted keywords
        """
        keywords = []
        patterns = [
            r'(?i)(?:key[ -]?words?|index terms)[:.]?\s*(.*?)(?:[.;]|\n|(?=\n\n)|$)',
            r'(?i)(?:PACS numbers?|Mathematics Subject Classification|AMS subject classifications?'
            r'|Computing Classification System|ACM CCS|MeSH terms)[:.]?\s*(.*?)(?:[.;]|\n|(?=\n\n)|$)',
            r'(?i)(?:subject headings?|thesaurus terms?|subject terms?|descriptors?)[:.]?\s*(.*?)(?:[.;]|\n|(?=\n\n)|$)',
            r'(?i)(?:mots[- ]?cl[ée]s?|schlüsselwörter|palabras[- ]?clave)[:.]?\s*(.*?)(?:[.;]|\n|(?=\n\n)|$)'
        ]

        # Common section headers that indicate the end of front matter
        section_markers = [
            '1. Introduction', '1 Introduction', 'Introduction', 
            'Background', 'Literature Review', 'Methods',
            'Methodology', 'Results', 'Discussion',
            'I. ', 'II. ', 'Section 1', 'Section 2'
        ]
        
        try:
            # Only search first few pages for efficiency
            pages_to_search = min(max_pages, len(pdf_reader.pages))          
            for page_num in range(pages_to_search):
                try:
                    text = pdf_reader.pages[page_num].extract_text()
                    if not text:
                        continue
                        
                    # Clean text while preserving important separators
                    text = re.sub(r'\s+', ' ', text)
                    text = re.sub(r'(?<=[.,;])\s*(?=[A-Z])', '\n', text)  # Add breaks at major punctuation
                    
                    # Check for section markers and truncate text
                    for marker in section_markers:
                        marker_pos = text.find(marker)
                        if marker_pos != -1:
                            text = text[:marker_pos]
                            break
                    
                    # Extract keywords using patterns
                    for pattern in patterns:
                        matches = re.findall(pattern, text, re.DOTALL | re.IGNORECASE)
                        for match in matches:
                            # Handle both string and tuple matches
                            match_text = match[0] if isinstance(match, tuple) else match
                            
                            # Clean and split the matched text
                            cleaned_keywords = match_text.strip()
                            # Split on common keyword separators
                            for separator in [';', ',', '•', '·', '—', '-', '\n']:
                                if separator in cleaned_keywords:
                                    keywords.extend([k.strip() for k in cleaned_keywords.split(separator)])
                                    break
                            else:
                                keywords.append(cleaned_keywords)
                            
                except Exception as e:
                    print(f"Error processing page {page_num}: {str(e)}")
                    continue
                
            # Post-processing of keywords
            processed_keywords = []
            for keyword in keywords:
                # Skip if too short or too long
                if not keyword or len(keyword) < 3 or len(keyword) > 100:
                    continue
                # Clean up the keyword
                cleaned = re.sub(r'^\W+|\W+$', '', keyword)  # Remove leading/trailing non-word chars
                cleaned = re.sub(r'\s+', ' ', cleaned)       # Normalize whitespace
                cleaned = cleaned.strip()               
                if cleaned and len(cleaned) >= 3:
                    processed_keywords.append(cleaned)
            
            # Remove duplicates while preserving order
            seen = set()
            final_keywords = []
            for keyword in processed_keywords:
                lower_keyword = keyword.lower()
                if lower_keyword not in seen:
                    seen.add(lower_keyword)
                    final_keywords.append(keyword)
            
            return final_keywords[:10]  # Limit to top 10 keywords
            
        except Exception as e:
            print(f"Error in keyword extraction: {str(e)}")
            return []
    
    def _metadata(self, xml_part):
        soup = self.bs4_client(xml_part, 'html.parser')
        title_tag = soup.find('div', class_='list-title mathjax')
        title = title_tag.get_text(strip=True).replace('Title:', '').strip() if title_tag else None

        # abstract
        abstract_tag = soup.find('p', class_='mathjax')
        abstract = abstract_tag.get_text(strip=True) if abstract_tag else None

        # authors
        authors_section = soup.find('div', class_='list-authors')
        authors = [author.get_text(strip=True) for author in authors_section.find_all('a')] if authors_section else []

        # comments
        comments_tag = soup.find('div', class_='list-comments mathjax')
        comments = comments_tag.get_text(strip=True).replace('Comments:', '').strip() if comments_tag else ''
        
        # figures, pages, tables
        figures_match = re.search(r'(\d+)\s+figures', comments)
        figures = int(figures_match.group(1)) if figures_match else None
        pages_match = re.search(r'(\d+)\s+pages', comments)
        pages = int(pages_match.group(1)) if pages_match else None
        tables_match = re.search(r'(\d+)\s+table[s]?', comments)
        tables = int(tables_match.group(1)) if tables_match else None

        # PDF link
        pdf_tag = soup.find('a', title='Download PDF')
        pdf_link = pdf_tag['href'] if pdf_tag else None

        # primary subject
        primary_subject_tag = soup.find('span', class_='primary-subject')
        primary_subject = primary_subject_tag.get_text(strip=True) if primary_subject_tag else None

        # secondary subjects
        subjects_section = soup.find('div', class_='list-subjects')
        if subjects_section:
            subjects_text = subjects_section.get_text(strip=True)
            subjects_split = subjects_text.split(';')
            secondary_subjects = [subject.strip() for subject in subjects_split[1:]] if len(subjects_split) > 1 else None
        else:
            secondary_subjects = None

        # journal
        submitted_journal = None
        if comments:
            for prefix in ['Submitted to ', 'Accepted to ', 'Accepted for publication in ', 'Accepted by ', 'Submitted by ']:
                if prefix in comments:
                    submitted_journal = comments.split(prefix)[-1]
                    break

        # published
        published_tag = soup.find('div', class_='list-journal-ref')
        published_journal = published_tag.get_text(strip=True).replace('Journal-ref:', '').strip() if published_tag else None

        return {
            'title': title,
            'abstract': abstract,
            'authors': authors,
            'figures': figures,
            'pages': pages,
            'tables': tables,
            'pdf_link': f'arxiv.org{pdf_link}' if pdf_link else None,
            'primary_subject': primary_subject,
            'secondary_subjects': secondary_subjects,
            'submitted_journal': submitted_journal,
            'published_journal': published_journal
        }
    
    def process_dataframe(self, df):
        """Process the dataframe to add all additional features"""
        # Clean subjects and journal information
        df = self._clean_subjects(df)
        df = self._clean_journal(df)
        
        # Initialize keywords column
        df['keywords'] = None
        df['affiliations'] = None
    
        # Process each paper
        for i in tqdm(range(len(df)), desc='Processing PDFs, for metrics, keywords and affiliations'):
            current_metrics = {
                'pages': df['pages'][i],
                'figures': df['figures'][i],
                'tables': df['tables'][i]
            }
            
            # Only process PDF if we're missing any data
            if (pd.isna(current_metrics['pages']) or 
                pd.isna(current_metrics['figures']) or 
                pd.isna(current_metrics['tables']) or 
                pd.isna(df['keywords'][i]) or
                pd.isna(df['affiliations'][i])):
                
                pdf_data = self._process_pdf(
                    df['pdf_link'][i], 
                    current_metrics,
                    authors=df['authors'][i] if 'authors' in df else None
                )
                
                if pdf_data:
                    df.at[i, 'pages'] = pdf_data['pages']
                    df.at[i, 'figures'] = pdf_data['figures']
                    df.at[i, 'tables'] = pdf_data['tables']
                    df.at[i, 'keywords'] = pdf_data['keywords']
                    if pdf_data['affiliations']:
                        df.at[i, 'affiliations'] = pdf_data['affiliations']
        
        return df
    
    def construct_dataframe(self):
        """Construct and process the complete dataframe"""
        # Get initial data
        html = self._retrieve_html()
        soup = self.bs4_client(html, 'html.parser')
        
        h3_tag = soup.find('h3', string=lambda x: x and 'New submissions' in x)
        
        if not h3_tag:
            print("New submissions header not found")
            return pd.DataFrame()
            
        try:
            number_of_papers = int(h3_tag.string.split('(')[1].split()[1])
            print(f"Number of papers: {number_of_papers}")
        except (IndexError, ValueError):
            print("Could not extract number of papers")
            return pd.DataFrame()
            
        # Get metadata for all papers
        items = soup.find_all('a', attrs={'name': True})
        if not items:
            print("No paper items found")
            return pd.DataFrame()
            
        all_metadata = []
        
        # Process papers except the last one
        for i in tqdm(range(number_of_papers-1),desc='Processing Papers'):
            start = items[i]
            end = items[i + 1]
            start_index = str(soup).find(str(start))
            end_index = str(soup).find(str(end))
            xml_part = str(soup)[start_index:end_index]
            metadata = self._metadata(xml_part)
            all_metadata.append(metadata)
            
        # Process the last paper
        last_item = items[-1]
        start_index = str(soup).find(str(last_item))
        xml_part = str(soup)[start_index:]
        metadata = self._metadata(xml_part)
        all_metadata.append(metadata)
        
        # Create and process dataframe
        df = pd.DataFrame(all_metadata) 
        return self.process_dataframe(df)

In [4]:
arxiv_data = ArxivDataframe('astro-ph')
# Get processed dataframe with all features
df = arxiv_data.construct_dataframe()

Number of papers: 55


Processing Papers: 100%|██████████| 54/54 [00:06<00:00,  8.65it/s]
Processing PDFs, for metrics, keywords and affiliations:   2%|▏         | 1/55 [00:03<03:04,  3.41s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['John F. Wu']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords

Looking for affiliation blocks...
Found potential affiliation: Institute, 3700 San Martin Dr, Baltimore, MD 21218 2Department of Physics & Astronomy, Johns Hopkins University, 3400 N Charles St, Baltimore, MD 21218 3Department of Computer Science, Johns Hopkins University, 3400 N Charles St, Baltimore, MD 21218 ABSTRACT Galaxy appearances reveal the physics of how they formed and evolved.

Found 1 unique potential affiliations

Processing author: John F. Wu (searching for: Wu)
No affiliations found near author John F. Wu

=== Affiliation Extraction Complete ===
Final affiliations: [None]


Processing PDFs, for metrics, keywords and affiliations:   4%|▎         | 2/55 [00:10<05:02,  5.70s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Shi-Ju Kang', 'Shan-Shan Ren', 'Yong-Gang Zheng', 'Qingwen Wu']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords

Looking for affiliation blocks...
Found potential affiliation: School of Physics and Electrical Engineering, Liupanshui Normal University, Liupanshui, Guizhou, 553004, People’s Republic of China 2Institute of Space Sciences, Shandong University, Weihai, Shandong, 264209, People’s Republic of China 3Department of Physics, Yunnan Normal University, Kunming, Yunnan, 650092, People’s Republic of China 4Department of Astronomy, School of Physics, Huazhong University of Science and Technology, Wuhan, Hubei, 430074, People’s Republic of China (Received March 1, 2021; Revised April 1, 2021; Accepted January 3, 2025; Published January 3, 2025) Submitted to ApJ ABSTRACT The changing-look blazars (CLBs) are the blazars that their optical spectral lines at different epochs show a signific

Processing PDFs, for metrics, keywords and affiliations:   5%|▌         | 3/55 [00:13<03:35,  4.15s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Mathilde Mâlin', 'Anthony Boccaletti', 'Clément Perrot', 'Pierre Baudoz', 'Daniel Rouan', 'Pierre-Olivier Lagage', 'Rens Waters', 'Manuel Güdel', 'Thomas Henning', 'Bart Vandenbussche', 'Olivier Absil', 'David Barrado', 'Benjamin Charnay', 'Elodie Choquet', 'Christophe Cossou', 'Camilla Danielski', 'Leen Decin', 'Adrian M. Glauser', 'John Pye', 'Goran Olofsson', 'Alistair Glasse', 'Polychronis Patapis', 'Pierre Royer', 'Silvia Scheithauer', 'Eugene Serabyn', 'Pascal Tremblin', 'Niall Whiteford', 'Ewine F. van Dishoeck', 'Göran Ostlin', 'Tom P. Ra', 'Gillian Wright']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Introduction
Truncated text at marker: 1.
Truncated text at marker: Methods

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: Mathilde Mâlin (searching for: Mâlin)
No affiliations found near author Mathilde Mâlin

Processing author: Anthony Boc

Processing PDFs, for metrics, keywords and affiliations:   7%|▋         | 4/55 [00:13<02:17,  2.70s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['S.I. Ipatov']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract
Truncated text at marker: I.

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: S.I. Ipatov (searching for: Ipatov)
Could not find author S.I. Ipatov in text

=== Affiliation Extraction Complete ===
Final affiliations: [None]


Processing PDFs, for metrics, keywords and affiliations:   9%|▉         | 5/55 [00:16<02:19,  2.79s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Somayeh Khakpash', 'Federica Bianco', 'Georgios Vernardos', 'Gregory Dobler', 'Charles Keeton']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords

Looking for affiliation blocks...
Found potential affiliation: University–New Brunswick, Department of Physics & Astronomy, 136 Frelinghuysen Rd, Piscataway, NJ 08854, USA 2LSST-DA Catalyst Fellow 3University of Delaware Department of Physics and Astronomy 217 Sharp Lab Newark, DE 19716 USA 4University of Delaware Joseph R.
Found potential affiliation: School of Public Policy and Administration, 184 Academy St, Newark, DE 19716 USA 5University of Delaware Data Science Institute 6Vera C.
Found potential affiliation: Department of Physics and Astronomy, Lehman College of the City University of New York, Bronx, NY, 10468, USA 8Department of Astrophysics, American Museum of Natural History, Central Park West and 79th Street, NY, 10024, USA ABSTRACT 

Processing PDFs, for metrics, keywords and affiliations:  11%|█         | 6/55 [00:18<02:06,  2.58s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Hisashi Hayakawa', 'Edward W. Cliver', 'Frédéric Clette', 'Yusuke Ebihara', 'Shin Toriumi', 'Ilaria Ermolli', 'Theodosios Chatzistergos', 'Kentaro Hattori', 'Delores J. Knipp', 'Séan P. Blake', 'Gianna Cauzzi', 'Kevin Reardon', 'Philippe-A. Bourdin', 'Dorothea Just', 'Mikhail Vokhmyanin', 'Keitaro Matsumoto', 'Yoshizumi Miyoshi', 'José R. Ribeiro', 'Ana P. Correia', 'David M. Willis', 'Matthew N. Wild', 'Sam M. Silverman']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...
Found potential affiliation: Institute for Space-Earth Environmental Research and Institute for Advanced Researches, Nagoya University, Nagoya 4648601, Japan; hisashi@nagoya-u.
Found potential affiliation: Laboratory, Harwell Campus, Didcot OX11 0QX, UK3National Solar Observatory, Boulder, CO 80303, USA; ecliver@nso.
Found potential affiliation: Center SILSO, Observatoire Royal de Belgiq

Processing PDFs, for metrics, keywords and affiliations:  13%|█▎        | 7/55 [00:29<04:21,  5.45s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Philippe-A. Bourdin', 'Yasuhito Narita']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: Philippe-A. Bourdin (searching for: Bourdin)
Could not find author Philippe-A. Bourdin in text

Processing author: Yasuhito Narita (searching for: Narita)
Could not find author Yasuhito Narita in text

=== Affiliation Extraction Complete ===
Final affiliations: [None, None]


Processing PDFs, for metrics, keywords and affiliations:  15%|█▍        | 8/55 [00:42<05:58,  7.64s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Di Wu', 'Jing-Zhi Zhou', 'Yu-Ting Kuang', 'Zhi-Chao Li', 'Zhe Chang', 'Qing-Guo Huang']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...
Found potential affiliation: School of Fundamental Physics and Mathematical Sciences, Hangzhou Institute for Advanced Study, UCAS, Hangzhou 310024, China bCenter for Joint Quantum Studies and Department of Physics, School of Science, Tianjin University, Tianjin 300350, China cInstitute of High Energy Physics, Chinese Academy of Sciences, Beijing 100049, China dUniversity of Chinese Academy of Sciences, Beijing 100049, China eCASKeyLaboratoryofTheoreticalPhysics, InstituteofTheoreticalPhysics, ChineseAcademy of Sciences E-mail: wudi@ucas.

Found 1 unique potential affiliations

Processing author: Di Wu (searching for: Wu)
No affiliations found near author Di Wu

Processing author: Jing-Zhi Zhou (searching for: Zhou)
No a

Processing PDFs, for metrics, keywords and affiliations:  16%|█▋        | 9/55 [00:43<04:19,  5.64s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Hui Liu', 'Hui Li', 'Sizhong Zou', 'Kaifan Ji', 'Zhenyu Jin', 'Jiahui Shan', 'Jingwei Li', 'Guanglu Shi', 'Yu Huang', 'Li Feng', 'Jianchao Xue', 'Qiao Li', 'Dechao Song', 'Ying Li']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...
Found potential affiliation: Laboratory of Dark Matter and Space Astronomy, Purple Mountain Observatory, Chinese Academy of Sciences, Nanjing 210023, China; nj.

Found 1 unique potential affiliations

Processing author: Hui Liu (searching for: Liu)
No affiliations found near author Hui Liu

Processing author: Hui Li (searching for: Li)
No affiliations found near author Hui Li

Processing author: Sizhong Zou (searching for: Zou)
No affiliations found near author Sizhong Zou

Processing author: Kaifan Ji (searching for: Ji)
Could not find author Kaifan Ji in text

Processing author: Zhenyu Jin (searching for: Jin)
No affiliations

Processing PDFs, for metrics, keywords and affiliations:  18%|█▊        | 10/55 [00:44<03:08,  4.20s/it]


Found 0 unique potential affiliations

Processing author: Bing Zhang (searching for: Zhang)
No affiliations found near author Bing Zhang

=== Affiliation Extraction Complete ===
Final affiliations: [None]


Processing PDFs, for metrics, keywords and affiliations:  20%|██        | 11/55 [00:51<03:36,  4.92s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Becca Spejcher', 'Noel D. Richardson', 'Herbert Pablo', 'Marina Beltran', 'Payton Butler', 'Eddie Avila']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords
Truncated text at marker: 1.

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: Becca Spejcher (searching for: Spejcher)
Could not find author Becca Spejcher in text

Processing author: Noel D. Richardson (searching for: Richardson)
Could not find author Noel D. Richardson in text

Processing author: Herbert Pablo (searching for: Pablo)
Could not find author Herbert Pablo in text

Processing author: Marina Beltran (searching for: Beltran)
Could not find author Marina Beltran in text

Processing author: Payton Butler (searching for: Butler)
Could not find author Payton Butler in text

Processing author: Eddie Avila (searching for: Avila)
Could not find author Eddie Avila in text

=== Affiliation

Processing PDFs, for metrics, keywords and affiliations:  22%|██▏       | 12/55 [00:55<03:22,  4.70s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Sripan Mondal', 'Akash Bairagi', 'A. K. Srivastava']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords

Looking for affiliation blocks...
Found potential affiliation: Department of Physics, Indian Institute of Technology (BHU), Varanasi-221005, India.
Found potential affiliation: Department of Physics, Indian Institute of Technology (BHU), Varanasi-221005, India 3Department of Physics, Indian Institute of Technology (BHU), Varanasi-221005, India.

Found 2 unique potential affiliations

Processing author: Sripan Mondal (searching for: Mondal)
Found matching affiliation: Department of Physics, Indian Institute of Technology (BHU), Varanasi-221005, India.

Processing author: Akash Bairagi (searching for: Bairagi)
Found matching affiliation: Department of Physics, Indian Institute of Technology (BHU), Varanasi-221005, India.

Processing author: A. K. Srivastava (searching for: Srivastava)
Coul

Processing PDFs, for metrics, keywords and affiliations:  24%|██▎       | 13/55 [00:56<02:38,  3.77s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Takeru K. Suzuki', 'Keiichi Ohnaka', 'Yuki Yasuda']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...
Found potential affiliation: School of Arts & Sciences, The University of Tokyo, 3-8-1, Komaba, Meguro, Tokyo 153-8902, Japan; Department of Astronomy, The University of Tokyo, 7-3-1, Hongo, Bunkyo, Tokyo, 113-0033, Japan; Komaba Institute for Science, The University of Tokyo, 3-8-1 Komaba, Meguro, Tokyo 153-8902, Japan 2Instituto de Astrofísica, Departamento de Física y Astronomía, Facultad de Ciencias Exactas, Universidad Andrés Bello, Fernández Concha 700, Las Condes, Santiago, Chile 3Division of Physics, Faculty of Science, Kita 10 Nishi 8, Kita-ku, Hokkaido University, Sapporo 060-0810, Japan ∗E-mail: stakeru@ea.

Found 1 unique potential affiliations

Processing author: Takeru K. Suzuki (searching for: Suzuki)
Could not find author Takeru K. Suzuki 

Processing PDFs, for metrics, keywords and affiliations:  25%|██▌       | 14/55 [01:00<02:32,  3.72s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Shi Pi', 'Misao Sasaki', 'Volodymyr Takhistov', 'Jianing Wang']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...
Found potential affiliation: Laboratory of Theoretical Physics, Institute of Theoretical Physics, Chinese Academy of Sciences, Beijing 100190, China bCenter for High Energy Physics, Peking University, Beijing 100871, China cKavli Institute for the Physics and Mathematics of the Universe (WPI), UTIAS, The Uni- versity of Tokyo, Kashiwa, Chiba 277-8583, Japan dCenter for Gravitational Physics and Quantum Information, Yukawa Institute for Theoret- ical Physics, Kyoto University, Kyoto 606-8502, Japan eLeung Center for Cosmology and Particle Astrophysics, National Taiwan University, Taipei 10617 fInternational Center for Quantum-field Measurement Systems for Studies of the Universe and Particles (QUP,WPI), High Energy Accelerator Research Organiza

Processing PDFs, for metrics, keywords and affiliations:  27%|██▋       | 15/55 [01:07<03:10,  4.76s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Miftahul Hilmi', 'Nicha Leethochawalit', 'Michele Trenti', 'Benjamin Metha']
Successfully read page 1
Successfully read page 2
Truncated text at marker: 1.

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: Miftahul Hilmi (searching for: Hilmi)
Could not find author Miftahul Hilmi in text

Processing author: Nicha Leethochawalit (searching for: Leethochawalit)
Could not find author Nicha Leethochawalit in text

Processing author: Michele Trenti (searching for: Trenti)
Could not find author Michele Trenti in text

Processing author: Benjamin Metha (searching for: Metha)
Could not find author Benjamin Metha in text

=== Affiliation Extraction Complete ===
Final affiliations: [None, None, None, None]


Processing PDFs, for metrics, keywords and affiliations:  29%|██▉       | 16/55 [01:16<03:48,  5.86s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Qiguo Tian', 'Lei Hao', 'Yipeng Zhou', 'Xiheng Shi', 'Tuo Ji', 'Peng Jiang', 'Lin Lin', 'Zhenya Zheng', 'Hongyan Zhou']
Successfully read page 1
Successfully read page 2
Truncated text at marker: 1.

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: Qiguo Tian (searching for: Tian)
Could not find author Qiguo Tian in text

Processing author: Lei Hao (searching for: Hao)
Could not find author Lei Hao in text

Processing author: Yipeng Zhou (searching for: Zhou)
Could not find author Yipeng Zhou in text

Processing author: Xiheng Shi (searching for: Shi)
Could not find author Xiheng Shi in text

Processing author: Tuo Ji (searching for: Ji)
Could not find author Tuo Ji in text

Processing author: Peng Jiang (searching for: Jiang)
Could not find author Peng Jiang in text

Processing author: Lin Lin (searching for: Lin)
Could not find author Lin Lin in text

Processing author: Zhenya Z

Processing PDFs, for metrics, keywords and affiliations:  31%|███       | 17/55 [01:19<03:10,  5.00s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['T. Şahin', 'F. Güney', 'S.A. Şentürk', 'N. Çınar', 'M. Marışmak']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords

Looking for affiliation blocks...
Found potential affiliation: University, Faculty of Science, Department of Space Sciences and Technologies 07058, Antalya, Türkiye 2Institute of Graduate Studies in Science, Akdeniz University, Türkiye ABSTRACT ThisstudyintroducesalinelistfortheabundanceanalysisofF-andG-typestarsacrossthe4080–9675Åwavelength range.

Found 1 unique potential affiliations

Processing author: T. Şahin (searching for: Şahin)
No affiliations found near author T. Şahin

Processing author: F. Güney (searching for: Güney)
No affiliations found near author F. Güney

Processing author: S.A. Şentürk (searching for: Şentürk)
No affiliations found near author S.A. Şentürk

Processing author: N. Çınar (searching for: Çınar)
No affiliations found near author N. Çınar

Proc

Processing PDFs, for metrics, keywords and affiliations:  33%|███▎      | 18/55 [01:29<04:10,  6.77s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['V. Hocdé', 'A. Matter', 'N. Nardetto', 'A. Gallenne', 'P. Kervella', 'A. Mérand', 'G. Pietrzyński', 'W. Gieren', 'J. Leftley', 'S. Robbe-Dubois', 'B. Lopez', 'M. C. Bailleul', 'G. Bras', 'R. Smolec', 'P. Wielgórski', 'G. Hajdu', 'A. Afanasiev']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Introduction
Truncated text at marker: 1.
Truncated text at marker: Methods

Looking for affiliation blocks...
Found potential affiliation: Centre, Polish Academy of Sciences, Bartycka 18, 00-716 Warszawa, Poland email : vhocde@camk.
Found potential affiliation: Laboratory for Astronomy, IRL 3386, CNRS, Casilla 36-D, Santiago, Chile 5LESIA, Observatoire de Paris, Université PSL, CNRS, Sorbonne Université, Université Paris-Cité, 5 Place Jules Janssen,92195 Meudon, France, 6European Southern Observatory, Karl-Schwarzschild-Str.

Found 2 unique potential affiliations

Processing author: V. Hocdé (searching for: 

Processing PDFs, for metrics, keywords and affiliations:  35%|███▍      | 19/55 [01:35<03:54,  6.50s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Guanwen Fang', 'Yao Dai', 'Zesen Lin', 'Chichun Zhou', 'Jie Song', 'Yizhou Gu', 'Xiaotong Guo', 'Anqi Mao', 'Xu Kong']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Introduction
Truncated text at marker: 1.

Looking for affiliation blocks...
Found potential affiliation: Institute of Astronomy and Astrophysics, Anqing Normal University, Anqing 246133, People’s Republic of China, e-mail: wen@mail.
Found potential affiliation: Department of Physics, The Chinese University of Hong Kong, Shatin, N.
Found potential affiliation: School of Engineering, Dali University, Dali 671003, People’s Republic of China e-mail: zhouchichun@dali.
Found potential affiliation: Department of Astronomy, University of Science and Technology of China, Hefei 230026, China, e-mail: xkong@ustc.
Found potential affiliation: School of Astronomy and Space Science, University of Science and Technology of China, Hefei 230026, Pe

Processing PDFs, for metrics, keywords and affiliations:  36%|███▋      | 20/55 [01:44<04:14,  7.28s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Yanping Cong', 'Bin Yue', 'Yidong Xu', 'Furen Deng', 'Jiajun Zhang', 'Xuelei Chen']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords

Looking for affiliation blocks...
Found potential affiliation: Laboratory of Radio Astronomy and Technology, Chinese Academy of Sciences, 20A Datun Road, Chaoyang District, Beijing 100101, China 4School of Astronomy and Space Science, University of Chinese Academy of Sciences, Beijing 100049, China ABSTRACT Loop I/North Polar Spur (NPS) is the giant arc structure above the Galactic plane observed in the radio sky.
Found potential affiliation: Center (GC), associated with the Fermi Bubble and eROSITA X-ray bubble.

Found 2 unique potential affiliations

Processing author: Yanping Cong (searching for: Cong)
No affiliations found near author Yanping Cong

Processing author: Bin Yue (searching for: Yue)
No affiliations found near author Bin Yue

Processing auth

Processing PDFs, for metrics, keywords and affiliations:  38%|███▊      | 21/55 [01:58<05:12,  9.18s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Tanvi Sharma', 'Wen-Ping Chen', 'Beth Biller', 'Loic Albert', 'Belinda Damian', 'Jessy Jose', 'Bhavana Lalchand', 'Michael C. Liu', 'Yumiko Oasa']
Successfully read page 1
Successfully read page 2
Truncated text at marker: I.

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: Tanvi Sharma (searching for: Sharma)
Could not find author Tanvi Sharma in text

Processing author: Wen-Ping Chen (searching for: Chen)
Could not find author Wen-Ping Chen in text

Processing author: Beth Biller (searching for: Biller)
Could not find author Beth Biller in text

Processing author: Loic Albert (searching for: Albert)
Could not find author Loic Albert in text

Processing author: Belinda Damian (searching for: Damian)
Could not find author Belinda Damian in text

Processing author: Jessy Jose (searching for: Jose)
Could not find author Jessy Jose in text

Processing author: Bhavana Lalchand (searc

Processing PDFs, for metrics, keywords and affiliations:  40%|████      | 22/55 [02:04<04:28,  8.13s/it]

Successfully read page 2
Truncated text at marker: Introduction
Truncated text at marker: 1.
Truncated text at marker: Methods

Looking for affiliation blocks...
Found potential affiliation: Laboratory, Ahmedabad 380009, India e-mail: bhadrinaval@gmail.
Found potential affiliation: Institute of Technology, Gandhinagar 382355, India 3Satyendra Nath Bose National Centre for Basic Sciences, Block-JD, Sector-III, Salt Lake, Kolkata-700 106, India 4Institute of Applied Physics of the Russian Academy of Sciences, 46 Ul’yanov Str.
Found potential affiliation: Laboratory, California Institute of Technology, 4800 Oak Grove Drive, Pasadena, CA 91109, USA 6Aryabhatta Research Institute of Observational sciencES (ARIES), Manora Peak, Nainital 263001, India 7College of Humanities and Sciences, Ajman University, Ajman P.

Found 3 unique potential affiliations

Processing author: N. K. Bhadari (searching for: Bhadari)
No affiliations found near author N. K. Bhadari

Processing author: L. K. Dewangan 

Processing PDFs, for metrics, keywords and affiliations:  42%|████▏     | 23/55 [02:05<03:14,  6.06s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Tim Jenness', 'Stelios Voutsinas', 'Gregory P. Dubois-Felsmann', 'Andrei Salnikov']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...
Found potential affiliation: Institute of Technology, MS 100-22, Pasadena, CA 91125-2200, USA 3SLAC National Accelerator Laboratory, 2575 Sand Hill Rd.

Found 1 unique potential affiliations

Processing author: Tim Jenness (searching for: Jenness)
No affiliations found near author Tim Jenness

Processing author: Stelios Voutsinas (searching for: Voutsinas)
Could not find author Stelios Voutsinas in text

Processing author: Gregory P. Dubois-Felsmann (searching for: Dubois-Felsmann)
No affiliations found near author Gregory P. Dubois-Felsmann

Processing author: Andrei Salnikov (searching for: Salnikov)
No affiliations found near author Andrei Salnikov

=== Affiliation Extraction Complete ===
Final affiliations: [None, None,

Processing PDFs, for metrics, keywords and affiliations:  44%|████▎     | 24/55 [02:06<02:19,  4.51s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Federica Giacchino', 'Giovanni La Mura', 'Stefano Ciprini', 'Dario Gasparrini', 'Marcello Giroletti', 'Marco Laurenti']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...
Found potential affiliation: Center, via del Politecnico, 00133, Roma, Italy 3Istituto Nazionale di Astrofisica - Osservatorio Astronomico di Cagliari, Via della Scienza 5, 09047, Selargius (CA), Italy 4Laborat ´orio de Instrumentac ¸ ˜ao e F ´ısica Experimental de Part ´ıculas, Av.

Found 1 unique potential affiliations

Processing author: Federica Giacchino (searching for: Giacchino)
Could not find author Federica Giacchino in text

Processing author: Giovanni La Mura (searching for: Mura)
Could not find author Giovanni La Mura in text

Processing author: Stefano Ciprini (searching for: Ciprini)
Could not find author Stefano Ciprini in text

Processing author: Dario Gasparrini (searchin

Processing PDFs, for metrics, keywords and affiliations:  45%|████▌     | 25/55 [02:15<02:59,  5.97s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Andrew W. Mayo', 'Charles D. Fortenbach', 'Dana R. Louie', 'Courtney D. Dressing', 'Steven Giacalone', 'Caleb K. Harada', 'Emma V. Turtelboom']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords
Truncated text at marker: 1.

Looking for affiliation blocks...
Found potential affiliation: Department of Physics and Astronomy, San Francisco State University, San Francisco, CA 94132, USA 2Catholic University of America, Department of Physics, Washington, DC, 20064, USA 3Exoplanets and Stellar Astrophysics Laboratory (Code 667), NASA Goddard Space Flight Center, Greenbelt, MD 20771, USA 4Center for Research and Exploration in Space Science and Technology II, NASA/GSFC, Greenbelt, MD 20771, USA 5Department of Astronomy, University of California, Berkeley, Berkeley, CA 94720, USA 6Department of Astronomy, California Institute of Technology, Pasadena, CA 91125, USA ABSTRACT We characterize the atmos

Processing PDFs, for metrics, keywords and affiliations:  47%|████▋     | 26/55 [02:17<02:12,  4.56s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Zac Bailey', 'Riddhi Bandyopadhyay', 'Shadia Habbal', 'Miloslav Druckmüller']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...
Found potential affiliation: Institute for Astronomy, University of Hawai‘i; Honolulu, HI 96822, USA.
Found potential affiliation: Department of Astrophysical Sciences, Princeton University; Princeton, NJ 08544, USA.
Found potential affiliation: University of Technology; Brno, 616 69, Czech Republic.

Found 3 unique potential affiliations

Processing author: Zac Bailey (searching for: Bailey)
Found matching affiliation: Institute for Astronomy, University of Hawai‘i; Honolulu, HI 96822, USA.
Found matching affiliation: Department of Astrophysical Sciences, Princeton University; Princeton, NJ 08544, USA.

Processing author: Riddhi Bandyopadhyay (searching for: Bandyopadhyay)
Found matching affiliation: Institute for Astronomy, Uni

Processing PDFs, for metrics, keywords and affiliations:  49%|████▉     | 27/55 [02:26<02:53,  6.18s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['S.K.Maurya', 'A. Errehymy', 'Ksh. Newton Singh', 'G. Mustafa', 'Saibal Ray']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...
Found potential affiliation: DepartmentofMathematicalandPhysicalSciences, CollegeofArtsandSciences, University of Nizwa, P.
Found potential affiliation: Centre, School of Mathematics, Statistics and Computer Science, University of KwaZulu-Natal, Private Bag X54001, Durban 4000, South Africa eDepartment of Physics, National Defence Academy, Khadakwasla, Pune 411023, India dDepartment of Physics, Zhejiang Normal University, Jinhua 321004, China eCentre for Cosmology, Astrophysics and Space Science (CCASS), GLA University, Mathura 281406, Uttar Pradesh, India E-mail: sunil@unizwa.

Found 2 unique potential affiliations

Processing author: S.K.Maurya (searching for: S.K.Maurya)
Could not find author S.K.Maurya in text

Processing auth

Processing PDFs, for metrics, keywords and affiliations:  51%|█████     | 28/55 [02:28<02:08,  4.75s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['S. Qutub', 'Y.N. Kalugina', 'M. Derouich']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Introduction
Truncated text at marker: 1.
Truncated text at marker: Methods

Looking for affiliation blocks...
Found potential affiliation: Department, Faculty of Science, King Abdulaziz University, P.
Found potential affiliation: Department of Optics and Spectroscopy, Tomsk State University, 36 Lenin av.
Found potential affiliation: Institute of Spectroscopy, Russian Academy of Sciences, Fizicheskaya St.

Found 3 unique potential affiliations

Processing author: S. Qutub (searching for: Qutub)
Found matching affiliation: Department, Faculty of Science, King Abdulaziz University, P.
Found matching affiliation: Department of Optics and Spectroscopy, Tomsk State University, 36 Lenin av.

Processing author: Y.N. Kalugina (searching for: Kalugina)
Could not find author Y.N. Kalugina in text

Processing author: 

Processing PDFs, for metrics, keywords and affiliations:  53%|█████▎    | 29/55 [02:29<01:32,  3.55s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Yue Shao', 'Guo-Hong Du', 'Tian-Nuo Li', 'Xin Zhang']
Successfully read page 1
Successfully read page 2
Truncated text at marker: I.

Looking for affiliation blocks...
Found potential affiliation: Laboratory of Cosmology and Astrophysics, College of Sciences, Northeastern University, Shenyang 110819, China 2National Frontiers Science Center for Industrial Intelligence and Systems Optimization, Northeastern University, Shenyang 110819, China 3MOE Key Laboratory of Data Analytics and Optimization for Smart Industry, Northeastern University, Shenyang 110819, China Both particle physics experiments and cosmological observations have been used to explore neu- trino properties.

Found 1 unique potential affiliations

Processing author: Yue Shao (searching for: Shao)
No affiliations found near author Yue Shao

Processing author: Guo-Hong Du (searching for: Du)
No affiliations found near author Guo-Hong Du

Processing author: Tian-

Processing PDFs, for metrics, keywords and affiliations:  56%|█████▋    | 31/55 [02:40<01:43,  4.29s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Jayant Murthy', 'J. Michael Shull', 'Marc Postman', 'Joel Wm. Parker', 'Seth Redfield', 'Nathaniel Cunningham', 'G. Randall Gladstone', 'Jon P. Pineau', 'Pontus Brandt', 'Anne J. Verbiscer', 'Kelsi N. Singer', 'Harold A. Weaver', 'Richard C. Henry', 'S. Alan Stern']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords

Looking for affiliation blocks...
Found potential affiliation: Institute of Astrophysics, Bengaluru 560 034, India 2Department of Astrophysical & Planetary Sciences, CASA, University of Colorado, Boulder, CO 80309, USA 3Department of Physics & Astronomy, University of North Carolina, Chapel Hill, NC 27599, USA 4Space Telescope Science Institute, 3700 San Martin Drive, Baltimore, MD 21218, USA 5Department of Space Studies, Southwest Research Institute, 1301 Walnut Street, Suite 300, Boulder, CO 80302, USA 6Astronomy Department and Van Vleck Observatory, Wesleyan University, Midd

Processing PDFs, for metrics, keywords and affiliations:  58%|█████▊    | 32/55 [02:47<02:00,  5.26s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['S. R. Dhanush', 'A. Subramaniam', 'S. Subramanian']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords

Looking for affiliation blocks...
Found potential affiliation: Institute of Astrophysics, Bangalore, 560034, India 2Pondicherry University, R.

Found 1 unique potential affiliations

Processing author: S. R. Dhanush (searching for: Dhanush)
Found matching affiliation: Institute of Astrophysics, Bangalore, 560034, India 2Pondicherry University, R.

Processing author: A. Subramaniam (searching for: Subramaniam)
Found matching affiliation: Institute of Astrophysics, Bangalore, 560034, India 2Pondicherry University, R.

Processing author: S. Subramanian (searching for: Subramanian)
Found matching affiliation: Institute of Astrophysics, Bangalore, 560034, India 2Pondicherry University, R.

=== Affiliation Extraction Complete ===
Final affiliations: [['Institute of Astrophysics, Bangalore, 5600

Processing PDFs, for metrics, keywords and affiliations:  60%|██████    | 33/55 [02:49<01:33,  4.26s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Asim Ullah', 'Jameel-Un Nabi']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract
Truncated text at marker: 1.

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: Asim Ullah (searching for: Ullah)
Could not find author Asim Ullah in text

Processing author: Jameel-Un Nabi (searching for: Nabi)
Could not find author Jameel-Un Nabi in text

=== Affiliation Extraction Complete ===
Final affiliations: [None, None]


Processing PDFs, for metrics, keywords and affiliations:  62%|██████▏   | 34/55 [02:51<01:14,  3.55s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Vikram K. Jaiswal', 'Bożena Czerny']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Introduction
Truncated text at marker: 1.
Truncated text at marker: Methods

Looking for affiliation blocks...
Found potential affiliation: Center for Theoretical Physics, Polish Academy of Sciences, Al.

Found 1 unique potential affiliations

Processing author: Vikram K. Jaiswal (searching for: Jaiswal)
Found matching affiliation: Center for Theoretical Physics, Polish Academy of Sciences, Al.

Processing author: Bożena Czerny (searching for: Czerny)
Could not find author Bożena Czerny in text

=== Affiliation Extraction Complete ===
Final affiliations: [['Center for Theoretical Physics, Polish Academy of Sciences, Al.'], None]


Processing PDFs, for metrics, keywords and affiliations:  64%|██████▎   | 35/55 [02:52<00:58,  2.95s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Istiak Akib', 'François Hammer', 'Yanbin Yang', 'Marcel S. Pawlowski', 'Jianling Wang']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Introduction
Truncated text at marker: 1.

Looking for affiliation blocks...
Found potential affiliation: Institute for Astrophysics, An der Sternwarte 16, 14482 Potsdam, Germany Received 5 September 2024 /Accepted 20 December 2024 ABSTRACT A significant part of the Milky Way (MW) dwarf galaxies orbit within a Vast POlar Structure (VPOS) that is perpendicular to the Galactic disk, whose origin has not yet been identified.

Found 1 unique potential affiliations

Processing author: Istiak Akib (searching for: Akib)
No affiliations found near author Istiak Akib

Processing author: François Hammer (searching for: Hammer)
No affiliations found near author François Hammer

Processing author: Yanbin Yang (searching for: Yang)
No affiliations found near author Yanbin Yan

Processing PDFs, for metrics, keywords and affiliations:  65%|██████▌   | 36/55 [02:54<00:50,  2.67s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Giorgio Savini', 'Peter Hargrave', 'Peter A.R. Ade', 'Alexey Shitvov', 'Rashmi Sudiwala', 'Giampaolo Pisano', 'Carole Tucker', 'Jin Zhang']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract
Truncated text at marker: 1.

Looking for affiliation blocks...
Found potential affiliation: University College London, Gower Street, London, WC1E 6BT, UK.
Found potential affiliation: School of Physics & Astronomy, Cardiff University, The Parade, Cardiff, CB24 3AA, Wales, UK.
Found potential affiliation: School of Computing and Information Science, Anglia Ruskin University, East Road, Cambridge, CB1 1PT, UK.

Found 3 unique potential affiliations

Processing author: Giorgio Savini (searching for: Savini)
Found matching affiliation: University College London, Gower Street, London, WC1E 6BT, UK.

Processing author: Peter Hargrave (searching for: Hargrave)
Found matching affiliation: University College Lo

Processing PDFs, for metrics, keywords and affiliations:  67%|██████▋   | 37/55 [02:56<00:43,  2.39s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Conor A. Nixon', 'Natalie Carrasco', 'Christophe Sotin']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Introduction
Truncated text at marker: 1.

Looking for affiliation blocks...
Found potential affiliation: Laboratory, NASA Goddard Space Flight Center, Greenbelt, 20771, MD, USA bUniversite Paris-Saclay, UVSQ, CNRS, LATMOS, Guyancourt, 78280, France cENS Paris-Saclay, 4 Av.

Found 1 unique potential affiliations

Processing author: Conor A. Nixon (searching for: Nixon)
Found matching affiliation: Laboratory, NASA Goddard Space Flight Center, Greenbelt, 20771, MD, USA bUniversite Paris-Saclay, UVSQ, CNRS, LATMOS, Guyancourt, 78280, France cENS Paris-Saclay, 4 Av.

Processing author: Natalie Carrasco (searching for: Carrasco)
Could not find author Natalie Carrasco in text

Processing author: Christophe Sotin (searching for: Sotin)
Found matching affiliation: Laboratory, NASA Goddard Space Flight

Processing PDFs, for metrics, keywords and affiliations:  69%|██████▉   | 38/55 [02:58<00:39,  2.35s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Sanya Gupta', 'Navin Sridhar', 'Lorenzo Sironi']
Successfully read page 1
Successfully read page 2
Truncated text at marker: 1.

Looking for affiliation blocks...
Found potential affiliation: University, 3009 Broadway, New York, NY 10027, USA 2Department of Physics, Stanford University, 382 Via Pueblo Mall, Stanford, CA 94305, USA 3Kavli Institute for Particle Astrophysics & Cosmology, P.
Found potential affiliation: University, Stanford, CA 94305, USA 4Department of Astronomy and Columbia Astrophysics Laboratory, Columbia University, 550 W 120th St, New York, NY 10027, USA 5Center for Computational Astrophysics, Flatiron Institute, 162 5th Avenue, New York, NY 10010, USA Accepted XXX.

Found 2 unique potential affiliations

Processing author: Sanya Gupta (searching for: Gupta)
No affiliations found near author Sanya Gupta

Processing author: Navin Sridhar (searching for: Sridhar)
No affiliations found near author Navin Sri

Processing PDFs, for metrics, keywords and affiliations:  71%|███████   | 39/55 [03:03<00:49,  3.09s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Lily Whitler', 'Daniel P. Stark', 'Michael W. Topping', 'Brant Robertson', 'Marcia Rieke', 'Kevin N. Hainline', 'Ryan Endsley', 'Zuyi Chen', 'William M. Baker', 'Rachana Bhatawdekar', 'Andrew J. Bunker', 'Stefano Carniani', 'Stéphane Charlot', 'Jacopo Chevallard', 'Emma Curtis-Lake', 'Eiichi Egami', 'Daniel J. Eisenstein', 'Jakob M. Helton', 'Zhiyuan Ji', 'Benjamin D. Johnson', 'Pablo G. Pérez-González', 'Pierluigi Rinaldi', 'Sandro Tacchella', 'Christina C. Williams', 'Christopher N. A. Willmer', 'Chris Willott', 'Joris Witstok']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords
Truncated text at marker: 1.

Looking for affiliation blocks...
Found potential affiliation: University of Arizona, 933 N Cherry Ave, Tucson, AZ 85721, USA 2Department of Astronomy, University of California, Berkeley, Berkeley, CA 94720, USA 3Department of Astronomy and Astrophysics University of California, Santa

Processing PDFs, for metrics, keywords and affiliations:  73%|███████▎  | 40/55 [03:04<00:35,  2.35s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Zaza N. Osmanov']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract
Truncated text at marker: 1.

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: Zaza N. Osmanov (searching for: Osmanov)
Could not find author Zaza N. Osmanov in text

=== Affiliation Extraction Complete ===
Final affiliations: [None]


Processing PDFs, for metrics, keywords and affiliations:  75%|███████▍  | 41/55 [03:07<00:35,  2.53s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Yun Zheng', 'Kun Xu', 'Donghai Zhao', 'Y.P. Jing', 'Hongyu Gao', 'Xiaolin Luo', 'Ming Li']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords
Truncated text at marker: I.

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: Yun Zheng (searching for: Zheng)
Could not find author Yun Zheng in text

Processing author: Kun Xu (searching for: Xu)
Could not find author Kun Xu in text

Processing author: Donghai Zhao (searching for: Zhao)
Could not find author Donghai Zhao in text

Processing author: Y.P. Jing (searching for: Jing)
Could not find author Y.P. Jing in text

Processing author: Hongyu Gao (searching for: Gao)
Could not find author Hongyu Gao in text

Processing author: Xiaolin Luo (searching for: Luo)
Could not find author Xiaolin Luo in text

Processing author: Ming Li (searching for: Li)
Could not find author Ming Li in text

=== Affiliation 

Processing PDFs, for metrics, keywords and affiliations:  76%|███████▋  | 42/55 [03:17<01:03,  4.89s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['M. E. Molnar', 'R. Casini', 'P. Bryans', 'B. Berkey', 'K. Tyson']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: M. E. Molnar (searching for: Molnar)
Could not find author M. E. Molnar in text

Processing author: R. Casini (searching for: Casini)
Could not find author R. Casini in text

Processing author: P. Bryans (searching for: Bryans)
Could not find author P. Bryans in text

Processing author: B. Berkey (searching for: Berkey)
Could not find author B. Berkey in text

Processing author: K. Tyson (searching for: Tyson)
Could not find author K. Tyson in text

=== Affiliation Extraction Complete ===
Final affiliations: [None, None, None, None, None]


Processing PDFs, for metrics, keywords and affiliations:  78%|███████▊  | 43/55 [03:24<01:04,  5.41s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['u-Fu Shen', 'Yan Xu', 'Yi-Bo Wang', 'Xiu-Lin Huang', 'Xing-Xing Hu', 'Qi Yuan']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract
Truncated text at marker: 1.

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: u-Fu Shen (searching for: Shen)
Could not find author u-Fu Shen in text

Processing author: Yan Xu (searching for: Xu)
Could not find author Yan Xu in text

Processing author: Yi-Bo Wang (searching for: Wang)
Could not find author Yi-Bo Wang in text

Processing author: Xiu-Lin Huang (searching for: Huang)
Could not find author Xiu-Lin Huang in text

Processing author: Xing-Xing Hu (searching for: Hu)
Could not find author Xing-Xing Hu in text

Processing author: Qi Yuan (searching for: Yuan)
Could not find author Qi Yuan in text

=== Affiliation Extraction Complete ===
Final affiliations: [None, None, None, None, None, None]


Processing PDFs, for metrics, keywords and affiliations:  80%|████████  | 44/55 [06:45<11:45, 64.10s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Rohan Pattnaik', 'Jeyhan S. Kartaltepe', 'Clive Binu']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords
Truncated text at marker: I.

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: Rohan Pattnaik (searching for: Pattnaik)
Could not find author Rohan Pattnaik in text

Processing author: Jeyhan S. Kartaltepe (searching for: Kartaltepe)
Could not find author Jeyhan S. Kartaltepe in text

Processing author: Clive Binu (searching for: Binu)
Could not find author Clive Binu in text

=== Affiliation Extraction Complete ===
Final affiliations: [None, None, None]


Processing PDFs, for metrics, keywords and affiliations:  82%|████████▏ | 45/55 [06:49<07:40, 46.09s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Mengting Liu', 'Di Li', 'J. R. Dawson', 'Joel M. Weisberg', 'George Hobbs', 'Ningyu Tang', 'Gan Luo', 'Duo Xu', 'Donghui Quan']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...
Found potential affiliation: Center for Astronomical Computing, Zhejiang Laboratory, Hangzhou 311100, China; liumengting@nao.
Found potential affiliation: Department of Astronomy, Tsinghua University, Beijing 100084, China; dili@tsinghua.
Found potential affiliation: School of Mathematical and Physical Sciences and MQ Research Centre in Astronomy, Astrophysics, and Astrotechnology, Macquarie University, 2109, NSW, Australia 6Department of Physics and Astronomy, Carleton College, Northfield, MN 55057 7Department of Physics, Anhui Normal University, Wuhu, Anhui 241002, People’s Republic of China 8Institut de Radioastronomie Millimetrique, 300 rue de la Piscine, 38400, Saint-Martin d

Processing PDFs, for metrics, keywords and affiliations:  84%|████████▎ | 46/55 [06:51<04:56, 32.96s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['M.Rumenskikh', 'A.V.Taichenachev', 'I.F.Shaikhislamov', 'V.I.Yudin']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract
Truncated text at marker: I.

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: M.Rumenskikh (searching for: M.Rumenskikh)
No affiliations found near author M.Rumenskikh

Processing author: A.V.Taichenachev (searching for: A.V.Taichenachev)
Could not find author A.V.Taichenachev in text

Processing author: I.F.Shaikhislamov (searching for: I.F.Shaikhislamov)
Could not find author I.F.Shaikhislamov in text

Processing author: V.I.Yudin (searching for: V.I.Yudin)
Could not find author V.I.Yudin in text

=== Affiliation Extraction Complete ===
Final affiliations: [None, None, None, None]


Processing PDFs, for metrics, keywords and affiliations:  85%|████████▌ | 47/55 [06:55<03:12, 24.07s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Yuji He', 'Hailong Yuan', 'Zhongrui Bai', 'Mingkuan Yang', 'Mengxin Wang', 'Yiqiao Dong', 'Xin Huang', 'Ming Zhou', 'Qian Liu', 'Xiaozhen Yang', 'Ganyu Li', 'Ziyue Jiang', 'Haotong Zhang']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords
Truncated text at marker: 1.

Looking for affiliation blocks...
Found potential affiliation: Laboratory of Optical Astronomy, National Astronomical Observatories, Chinese Academy of Sciences, Beijing 100101, China 2School of Astronomy and Space Science, University of Chinese Academy of Sciences, Beijing 100049, China 3Observatoire de Paris, Paris Sciences et Lettres, CNRS, Place Jules Janssen, F-92195 Meudon, France ABSTRACT We report the analysis of the detached eclipsing spectroscopic binary system LAMOST J101356.

Found 1 unique potential affiliations

Processing author: Yuji He (searching for: He)
No affiliations found near author Yuji He

Processing 

Processing PDFs, for metrics, keywords and affiliations:  87%|████████▋ | 48/55 [07:02<02:13, 19.10s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Apashanka Das', 'Biswajit Pandey']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Abstract

Looking for affiliation blocks...
Found potential affiliation: Department of Physics, Visva-Bharati University, Santiniketan, 731235, India E-mail: a.

Found 1 unique potential affiliations

Processing author: Apashanka Das (searching for: Das)
Found matching affiliation: Department of Physics, Visva-Bharati University, Santiniketan, 731235, India E-mail: a.

Processing author: Biswajit Pandey (searching for: Pandey)
Found matching affiliation: Department of Physics, Visva-Bharati University, Santiniketan, 731235, India E-mail: a.

=== Affiliation Extraction Complete ===
Final affiliations: [['Department of Physics, Visva-Bharati University, Santiniketan, 731235, India E-mail: a.'], ['Department of Physics, Visva-Bharati University, Santiniketan, 731235, India E-mail: a.']]


Processing PDFs, for metrics, keywords and affiliations:  89%|████████▉ | 49/55 [07:04<01:24, 14.01s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Jing Wang', 'Dong Yang', 'Xuchen Lin', 'Qifeng Huang', 'Zhijie Qu', 'Hsiao-Wen Chen', 'Hong Guo', 'Luis C. Ho', 'Peng Jiang', 'Zezhong Liang', 'Céline Péroux', 'Lister Staveley-Smith', 'Simon Weng']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords
Truncated text at marker: 1.

Looking for affiliation blocks...
Found potential affiliation: Institute for Astronomy and Astrophysics, Peking University, Beijing 100871, China 2Department of Astronomy and Astrophysics, The University of Chicago, 5640 S.
Found potential affiliation: Centre for Radio Astronomy Research, University of Western Australia, 35 Stirling Highway, Crawley, WA 6009, Australia 8ARC Centre of Excellence for All-Sky Astrophysics in 3 Dimensions (ASTRO 3D), Australia 9Sydney Institute for Astronomy, School of Physics A28, University of Sydney, NSW 2006, Australia 10ATNF , CSIRO Space and Astronomy, PO Box 76, Epping, NSW 1710,

Processing PDFs, for metrics, keywords and affiliations:  91%|█████████ | 50/55 [07:07<00:52, 10.59s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['P. Penil', 'A. Domínguez', 'S. Buson', 'M. Ajello', 'S. Adhikari', 'A. Rico']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords

Looking for affiliation blocks...
Found potential affiliation: Department of Physics and Astronomy, Clemson University, Kinard Lab of Physics, Clemson, SC 29634-0978, USA 2IPARCOS and Department of EMFTEL, Universidad Complutense de Madrid, E-28040 Madrid, Spain 3Julius-Maximilians-Universit¨ at, 97070, W¨ urzburg, Germany (Accepted January 3, 2025) ABSTRACT Jetted Active Galactic Nuclei (AGN) exhibit variability across a wide range of time scales.

Found 1 unique potential affiliations

Processing author: P. Penil (searching for: Penil)
Could not find author P. Penil in text

Processing author: A. Domínguez (searching for: Domínguez)
Could not find author A. Domínguez in text

Processing author: S. Buson (searching for: Buson)
No affiliations found near author S

Processing PDFs, for metrics, keywords and affiliations:  93%|█████████▎| 51/55 [07:10<00:33,  8.26s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Konstantin V. Getman', 'Oleg Kochukhov', 'Joe P. Ninan', 'Eric D. Feigelson', 'Vladimir S. Airapetian', 'Abygail R. Waggoner', 'L. Ilsedore Cleeves', 'Jan Forbrich', 'Sergio A. Dzib', 'Charles J. Law', 'Christian Rab', 'Daniel M. Krolikowski']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords
Truncated text at marker: 1.

Looking for affiliation blocks...
Found potential affiliation: Department of Astronomy & Astrophysics Pennsylvania State University 525 Davey Laboratory University Park, PA 16802, USA 2Department of Physics and Astronomy, Uppsala University, Box 516, 75120 Uppsala, Sweden 3Department of Astronomy and Astrophysics, Tata Institute of Fundamental Research, Homi Bhabha Road, Colaba, Mumbai 400005, India 4American University, 4400 Massachusetts Avenue NW, Washington, DC 20016, USA USA 5NASA/GSFC/SEEC, Greenbelt, MD 20771, USA 6University of Virginia, Charlottesville, VA 22904,

Processing PDFs, for metrics, keywords and affiliations:  95%|█████████▍| 52/55 [07:12<00:19,  6.41s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Alfredo Micera', 'Daniel Verscharen', 'Jesse T. Coburn', 'Maria Elena Innocenti']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Keywords

Looking for affiliation blocks...
Found potential affiliation: Laboratory, University College London, Dorking, UK 3Space Science Institute, Boulder, CO 80301, USA ABSTRACT In-situ observations of the solar wind have shown that the electron velocity distribution function (VDF) consists of a quasi-Maxwellian core, comprising most of the electron population, and two sparser components: the halo, which are suprathermal and quasi-isotropic electrons, and an escaping beam population, the strahl.

Found 1 unique potential affiliations

Processing author: Alfredo Micera (searching for: Micera)
No affiliations found near author Alfredo Micera

Processing author: Daniel Verscharen (searching for: Verscharen)
No affiliations found near author Daniel Verscharen

Processi

Processing PDFs, for metrics, keywords and affiliations:  96%|█████████▋| 53/55 [07:20<00:13,  6.83s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Nicola Barbieri', 'Thejs Brinckmann', 'Stefano Gariazzo', 'Massimiliano Lattanzi', 'Sergio Pastor', 'Ofelia Pisanti']
Successfully read page 1
Successfully read page 2
Truncated text at marker: Introduction

Looking for affiliation blocks...
Found potential affiliation: Department of Physics, University of Turin, via P.

Found 1 unique potential affiliations

Processing author: Nicola Barbieri (searching for: Barbieri)
No affiliations found near author Nicola Barbieri

Processing author: Thejs Brinckmann (searching for: Brinckmann)
No affiliations found near author Thejs Brinckmann

Processing author: Stefano Gariazzo (searching for: Gariazzo)
No affiliations found near author Stefano Gariazzo

Processing author: Massimiliano Lattanzi (searching for: Lattanzi)
No affiliations found near author Massimiliano Lattanzi

Processing author: Sergio Pastor (searching for: Pastor)
No affiliations found near author Sergio Pastor

Pro

Processing PDFs, for metrics, keywords and affiliations:  98%|█████████▊| 54/55 [07:20<00:05,  5.05s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Chris Fox', 'Paul Wiegert']
Successfully read page 1
Successfully read page 2
Truncated text at marker: 1.

Looking for affiliation blocks...
Found potential affiliation: Department of Physics & Astronomy, The University of Western Ontario, London, Ontario, Canada 2Institute for Earth and Space Exploration (IESX), The University of Western Ontario, London, Ontario, Canada Submitted to the Monthly Notices of the Royal Astronomical Society, December 2024 ABSTRACT ThedebrisdiskaroundHD181327showsasignificantasymmetryinitssurfacebrightnessprofilewhenviewedinvisiblelight.

Found 1 unique potential affiliations

Processing author: Chris Fox (searching for: Fox)
No affiliations found near author Chris Fox

Processing author: Paul Wiegert (searching for: Wiegert)
No affiliations found near author Paul Wiegert

=== Affiliation Extraction Complete ===
Final affiliations: [None, None]


Processing PDFs, for metrics, keywords and affiliations: 100%|██████████| 55/55 [07:23<00:00,  8.07s/it]


=== Starting Affiliation Extraction ===
Processing authors: ['Amelia Carolina Sparavigna']
Successfully read page 1
Successfully read page 2
Truncated text at marker: 1.

Looking for affiliation blocks...

Found 0 unique potential affiliations

Processing author: Amelia Carolina Sparavigna (searching for: Sparavigna)
No affiliations found near author Amelia Carolina Sparavigna

=== Affiliation Extraction Complete ===
Final affiliations: [None]





In [5]:
df

Unnamed: 0,title,abstract,authors,figures,pages,tables,pdf_link,primary_subject,secondary_subjects,submitted_journal,published_journal,keywords,affiliations
0,Insights on Galaxy Evolution from Interpretabl...,Galaxy appearances reveal the physics of how t...,[John F. Wu],4.0,10.0,2.0,arxiv.org/pdf/2501.00089,Astrophysics of Galaxies,[Machine Learning],AAS Journals,,[Galaxies (573],[None]
1,Hunting for the candidates of Changing-Look Bl...,The changing-look blazars (CLBs) are the blaza...,"[Shi-Ju Kang, Shan-Shan Ren, Yong-Gang Zheng, ...",7.0,13.0,3.0,arxiv.org/pdf/2501.00094,High Energy Astrophysical Phenomena,,ApJ,,"[Active galactic nuclei (16) – Blazars (164, B...","[None, None, None, None]"
2,First unambiguous detection of ammonia in the ...,The newly accessible mid-infrared (MIR) window...,"[Mathilde Mâlin, Anthony Boccaletti, Clément P...",6.0,18.0,4.0,arxiv.org/pdf/2501.00104,Earth and Planetary Astrophysics,,A&A,,[Planetary systems],"[None, None, None, None, None, None, None, Non..."
3,Exchange of meteorites between the terrestrial...,The evolution of the orbits of bodies ejected ...,[S.I. Ipatov],0.0,6.0,0.0,arxiv.org/pdf/2501.00134,Earth and Planetary Astrophysics,,,Modern astronomy: from the Early Universe to e...,[planets and satellites: terrestrial planets],[None]
4,Autoencoder Reconstruction of Cosmological Mic...,Enhanced modeling of microlensing variations i...,"[Somayeh Khakpash, Federica Bianco, Georgios V...",11.0,18.0,1.0,arxiv.org/pdf/2501.00153,Instrumentation and Methods for Astrophysics,,The Astrophysical Journal,,[Interdisciplinary astronomy(804) 1],"[None, None, None, None, None]"
5,The Extreme Space Weather Event of 1872 Februa...,"We review observations of solar activity, geom...","[Hisashi Hayakawa, Edward W. Cliver, Frédéric ...",13.0,20.0,2.0,arxiv.org/pdf/2501.00176,Solar and Stellar Astrophysics,"[Earth and Planetary Astrophysics, Geophysics,...",,ApJ 959:23 (20pp) 2023,[],"[None, None, None, None, None, None, None, Non..."
6,Electromotive field in space and astrophysical...,The concept of electromotive field appears in ...,"[Philippe-A. Bourdin, Yasuhito Narita]",8.0,23.0,0.0,arxiv.org/pdf/2501.00181,Solar and Stellar Astrophysics,"[Earth and Planetary Astrophysics, High Energy...",,"Rev. Mod. Plasma Phys. 9, 1 (2025)","[Electromotive field, Dynamo mechanism, Turbul...","[None, None]"
7,Can tensor-scalar induced GWs dominate PTA obs...,Observational constraints on small-scale primo...,"[Di Wu, Jing-Zhi Zhou, Yu-Ting Kuang, Zhi-Chao...",7.0,19.0,1.0,arxiv.org/pdf/2501.00228,Cosmology and Nongalactic Astrophysics,"[High Energy Astrophysical Phenomena, General ...",,,[],"[None, None, None, None, None, None]"
8,Improving image quality of the Solar Disk Imag...,The in-flight calibration and performance of t...,"[Hui Liu, Hui Li, Sizhong Zou, Kaifan Ji, Zhen...",10.0,14.0,1.0,arxiv.org/pdf/2501.00231,Solar and Stellar Astrophysics,[Instrumentation and Methods for Astrophysics],,,"[techniques: image processing, sun: chromosphe...","[None, None, None, None, None, None, None, Non..."
9,On the Duration of Gamma-Ray Bursts,"Recently, a short-duration GRB with supernova ...",[Bing Zhang],1.0,10.0,0.0,arxiv.org/pdf/2501.00239,High Energy Astrophysical Phenomena,,,,[Gamma-ray bursts],[None]


In [None]:
def _extract_affiliations(self, pdf_reader, authors, max_pages=2):
        """
        Extract author affiliations from PDF by matching authors and their superscript markers
        Args:
            pdf_reader: PyPDF2.PdfReader object
            authors: List of author names
            max_pages: Maximum pages to search (usually in first 2 pages)
        Returns:
            list: List of affiliations corresponding to authors
        """
        affiliations = [None] * len(authors)
        try:
            # Only search first few pages where affiliations typically appear
            pages_to_search = min(max_pages, len(pdf_reader.pages))
            
            # Get text from first pages
            full_text = ""
            for page_num in range(pages_to_search):
                try:
                    page_text = pdf_reader.pages[page_num].extract_text()
                    full_text += page_text + "\n"
                except Exception as e:
                    print(f"Error reading page {page_num}: {str(e)}")
                    continue

            # Clean text
            full_text = re.sub(r'\s+', ' ', full_text)
            
            # Common patterns for affiliation sections
            section_markers = [
                'Abstract', 'Introduction', 'Keywords', 'I.', '1.', 
                'Methods', 'Background', 'Results'
            ]
            
            # Truncate text at first section marker
            for marker in section_markers:
                marker_pos = full_text.find(marker)
                if marker_pos != -1:
                    full_text = full_text[:marker_pos]
            
            # Extract affiliation block
            affiliation_patterns = [
                # Pattern for numbered affiliations
                r'(?:^|\n)(?:[1-9][0-9]?|[a-z])[).]\s*(.*?)(?=(?:[1-9][0-9]?|[a-z])[).]|$)',
                # Pattern for symbol-based affiliations
                r'(?:^|\n)(?:[†*§¶‡#])\s*(.*?)(?=(?:[†*§¶‡#])|$)',
                # Pattern for explicit affiliation markers
                r'(?i)(?:^|\n)(?:Affiliation|Address|Institution)[s:]?\s*(.*?)(?=\n|$)'
            ]
            
            # Find all affiliation blocks
            affiliation_blocks = []
            for pattern in affiliation_patterns:
                matches = re.finditer(pattern, full_text, re.MULTILINE | re.DOTALL)
                for match in matches:
                    affiliation_blocks.append(match.group(1).strip())
            
            # Process each author
            for i, author in enumerate(authors):
                try:
                    # Clean author name
                    clean_author = re.sub(r'[^\w\s]', '', author).strip()
                    
                    # Look for author pattern with superscript
                    author_patterns = [
                        # Number superscripts
                        rf"{clean_author}[\s{{}}]*?([1-9][0-9]?(?:,[1-9][0-9]?)*)",
                        # Symbol superscripts
                        rf"{clean_author}[\s{{}}]*?([†*§¶‡#](?:,[†*§¶‡#])*)",
                        # Letter superscripts
                        rf"{clean_author}[\s{{}}]*?([a-z](?:,[a-z])*)"
                    ]
                    
                    author_affiliations = []
                    for pattern in author_patterns:
                        matches = re.finditer(pattern, full_text, re.IGNORECASE)
                        for match in matches:
                            markers = match.group(1).split(',')
                            for marker in markers:
                                marker = marker.strip()
                                # Convert marker to index
                                if marker.isdigit():
                                    idx = int(marker) - 1
                                elif marker.isalpha():
                                    idx = ord(marker.lower()) - ord('a')
                                else:
                                    # For symbols, find matching affiliation block
                                    for block in affiliation_blocks:
                                        if block.startswith(marker):
                                            author_affiliations.append(block)
                                            continue
                                    continue
                                
                                if 0 <= idx < len(affiliation_blocks):
                                    author_affiliations.append(affiliation_blocks[idx])
                    
                    if author_affiliations:
                        affiliations[i] = list(set(author_affiliations))  # Remove duplicates
                    
                    # Fallback: If no superscript found, look for nearby affiliation
                    if not affiliations[i]:
                        author_pos = full_text.find(clean_author)
                        if author_pos != -1:
                            # Look for affiliation in next 200 characters
                            nearby_text = full_text[author_pos:author_pos + 200]
                            for block in affiliation_blocks:
                                if block in nearby_text:
                                    affiliations[i] = [block]
                                    break
                
                except Exception as e:
                    print(f"Error processing author {author}: {str(e)}")
                    continue
            
            return affiliations
            
        except Exception as e:
            print(f"Error in affiliation extraction: {str(e)}")
            return [None] * len(authors)