<a href="https://colab.research.google.com/github/lizaoh/smp_program_data/blob/main/smp2019_extract_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Top of Script

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Suppresses output from pip installs
%%capture

!pip install pymupdf
!pip install pymupdf-layout
!pip install pymupdf4llm
!pip install wordfreq
!pip install rapidfuzz
import glob
import os
import pathlib
import pymupdf
import pymupdf.layout
import pymupdf4llm
import re
import regex
import pandas as pd
import unicodedata
import wordfreq
from rapidfuzz import process, fuzz

In [3]:
pdfs_path = '/content/drive/MyDrive/math_psych_work/Conference Programs/'

# Functions
Created with help from GPT 5.2, but some are my own code just turned into a function.

In [4]:
def get_title(t: str) -> str:
  title = re.search(r'\*\*(.*?)\*\*', t).group()  # gets the matched text

  if ') ' in title:
    title = title.split(') ', 1)[1]

  return title.strip('**')

In [5]:
def fuzzy_match_title(title, choices, threshold=90):
    match = process.extractOne(
        title,
        choices,
        scorer=fuzz.token_set_ratio
    )
    if match and match[1] >= threshold:
        return match[0], match[1]
    return None, None

In [6]:
# Splits up an entry into title, authors, affiliations, and abstract then returns
# dictionary for the entry
def split_entry(entry, year):
  cleaned_entry = fix_ligatures(entry)

  # Split title from rest of entry
  if re.search(r'^Author\(s\):', cleaned_entry):
    title = ''
    rest_of_entry = cleaned_entry.lstrip('Author(s): ')
  else:
    title, rest_of_entry = re.split(r'\s*_?Author\(s\):_?\s', cleaned_entry)

  if re.search(r'\(\s?\d{2}\s?\)', title):
    title = re.split(r'\(\s?\d{2}\s?\)\s', title)[1].strip('**')

  # Splits up authors, affiliations, and abstracts (if present)
  if 'Abstract:' in rest_of_entry:
    author, abstract = re.split(r'Abstract:_?', rest_of_entry)
    auth_and_aff = re.split(r'. _?Contact:_?', author)[0].lstrip('Author(s): ')
  else:
    auth_and_aff = rest_of_entry
    abstract = ''

  # Split authors and affiliations
  aff_start_patt = r'\s\((?=1|[A-Z])'
  if re.search(aff_start_patt, auth_and_aff):
    authors, affiliations = re.split(aff_start_patt, auth_and_aff, maxsplit=1)
  # Or just gives authors if no affiliations listed (i.e., the ICCM posters)
  else:
    authors = auth_and_aff
    affiliations = ''

  # Authors are first name last name for the ICCM posters, which have no abstracts
  authors = parse_authors(authors)
  final_authors = ', '.join(authors).strip('.')

  final_affiliations = remove_locations(affiliations.rstrip(')'))

  entry_dict = {
      'year': year,
      'author(s)': final_authors.strip(),
      'affiliation(s)': final_affiliations,
      'title': title,
      'type': '',
      'abstract': clean_text(abstract.strip())
  }

  return entry_dict

In [7]:
LOCATIONS = [
    'United States of America', 'United States', 'Switzerland', 'Japan', 'Bremen',
    'Berlin, Germany', 'Heidelberg, Germany', 'Germany', 'Berlin', 'Norway',
    'Turkey', 'Belgium', 'Italy', 'Israel', 'New Brunswick, New Jersey',
    'Australia', 'The Netherlands', 'USA', 'Netherlands, The', 'Netherlands',
    'United Kingdom', 'Singapore', 'France', 'Dayton OH', 'Dayton', 'India',
    'Taiwan, Republic of China', 'Austria', 'Canada', 'Denmark', 'Spain',
    'Edmonton', 'Bloomington, Indiana', 'Indiana', 'Russian Federation',
    'University Park, Pennsylvania', 'California', 'San Francisco, California',
    'Taipei, Taiwan', 'Charlottesville, Virginia', 'New York, New York',
    'Toronto, Ontario', 'New Haven, Connecticut', 'Ann Arbor, Michigan', 'Ohio',
    'Ottawa, Ontario', 'Houston, Texas', 'UK', 'New Brunswick, Piscataway, NJ',
    'Finland', 'Iceland', 'Mexico', 'South Korea'
]

# compile once
LOCATION_RE = re.compile(
    r',\s*(?:' + '|'.join(map(re.escape, LOCATIONS)) + r')\b',
    re.I
)

def remove_locations(entry: str) -> str:
    return LOCATION_RE.sub('', entry).strip()

In [8]:
def parse_authors(line: str) -> list[str]:
    line = line.strip()

    # Only ICCM posters use "and" in the list
    if ' and ' in line:
      line = line.replace(' and ', ', ')
      line = line.replace(',', ';')

    authors = []

    for part in line.split(';'):
        part = part.strip()
        if not part:
            continue

        # Convert "Last, First Middle" → "First Middle Last"
        if ',' in part:
            last, first = part.split(',', 1)
            authors.append(f"{first.strip()} {last.strip()}")
        else:
            authors.append(part)  # fallback, just in case

    return authors

In [9]:
def parse_labeled_authors(text: str):
  # Splits authors on commas outside of the brackets for affiliation labels
  authors = re.split(r',\s*(?![^\[]*\])', text)
  out = []

  for a in authors:
      a = a.strip()
      if not a:
          continue

      # Extracts all numbers
      indices = [int(x) for x in re.findall(r'\d+', a)]

      # Cleans author names
      # Removes brackets/parens containing digits (e.g., [1], (1), [(1)])
      name = re.sub(r'[\[\(][\d,\s\(\)\[\]]*\d+[\d,\s\(\)\[\]]*[\]\)]', '', a)

      # Removes empty/symbol-only brackets (e.g., [], (), [,])
      name = re.sub(r'[\[\(][^A-Za-z0-9]*[\]\)]', '', name)
      name = re.sub(r'\s+', ' ', name).strip()

      out.append((name, indices))

  return out

In [10]:
def parse_affiliation_dict(aff_text: str) -> dict[int, str]:
    out = {}

    affs = re.split(r';\s', aff_text)
    for aff in affs:
      num = int(aff[0])
      aff_name = aff.split(': ', 1)[1]
      out[num] = aff_name

    return out

In [11]:
def make_aff_list(authors, aff_dict):
    author_names = []
    author_affiliations = []

    for name, indices in authors:
        author_names.append(name)

        affs = [
            aff_dict[i]
            for i in indices
            if i in aff_dict
        ]

        # join multiple affiliations for the SAME author with " and "
        author_affiliations.append(" / ".join(affs))

    new_authors = ", ".join(author_names)
    new_affiliations = "; ".join(author_affiliations)

    return new_authors, new_affiliations

In [13]:
def remove_page_break_text(text: str) -> str:
    if not text:
        return text

    # Remove page number
    text = re.sub(r'\n*p. \d{1,2} \n*', '', text)

    # Remove date if present
    text = re.sub(
        r'''
        (?:Fri|Satur|Sun|Mon)day,\sJuly\s\d{2},\s2019,\s
        (?:morning|afternoon)\s(\n*)?
        ''',
        '',
        text,
        flags=re.VERBOSE
    )

    # Remove poster session text if present
    text = re.sub(r'\sPoster\ssession', '', text)

    text = re.sub(
        r'''
        (?:Society\sfor\sMathematical\sPsychology|ICCM)
        \sposters\s(\n*)?
        ''',
        '',
        text,
        flags=re.VERBOSE
    )

    # One title has "Joint Modeling" in it so don't want to remove "Modeling" there
    text = re.sub(
        r'''
        (?<!Joint\s)
        (?:Symposium\sin\sHonor\sof\sBill\sBatchelder|Decision\smaking\s(?:1|2|3)|
        Perception\sand\sPsychophysics|Memory\s(?:1|2)|ICCMs\track\s(?:1|3|4|5|6|7|9)|
        Process\smodels|Modeling\sstrategy\suse\sin\ssearching\sand\sdeciding|
        Accumulator\smodels\s(?:1|2|3)|Cognition|Language|Vision|Neurocognitive\smodeling|
        Numerical\scognition|Methods\s(?:1|2)|Symposium\son\sOrganizational\s
        Principles\sof\sVision|Modeling)
        \s\n\n
        ''',
        '',
        text,
        flags=re.VERBOSE
    )

    return text.strip()

In [14]:
def clean_text(text, fix_whitespace=False):
    if not text:
        return text

    text = fix_ligatures(text)
    text = remove_page_break_text(text)

    # All "I's" are pipes so replacing those
    text = text.replace('|', 'I')

    # Gets rid of '\' before '
    text = text.replace("\'", "'")

    # Removes any "(1)" or "(2)" within the abstract (not for affiliations)
    # that got extracted weirdly
    text = re.sub(r"(?:‘\)|\\'2\)|‘2\)|‘\?\)|'\?\)|\?\)|!\)|\"\"\)|\(\))", '', text)

    if fix_whitespace:
      text = re.sub(r'\s+', ' ', text)
      text = re.sub(r'\s*\n\s*', '\n\n', text)

    text = text.strip()

    return text

In [15]:
LIGATURE_MAP = {
    "ﬁ": "fi", "ﬂ": "fl", "ﬃ": "ffi", "ﬄ": "ffl", "ﬀ": "ff", "ﬅ": "ft", "ﬆ": "st",
    "Æ": "ffi", "¨u": "ü", "¨a": "ä", "´e": "é", "`e": "è", "`a": "à", "¨o": "ö",
    "˚a": "å", "c¸": "ç", '“': '"', '”': '"', "’": "'", '˜n': 'ñ', 'ˇs': 'š',
    "âĂŸ": "'", "``": '"', "↵": "ff", "✏": "ffl"
}

def fix_ligatures(text):
    # Replace known ligatures
    for bad, good in LIGATURE_MAP.items():
        text = text.replace(bad, good)

    # Replace any private-use ligature (common in PDFs)
    cleaned_chars = []
    for ch in text:
        name = unicodedata.name(ch, "")
        if "LIGATURE" in name.upper():
            # Try to break it apart: remove spaces and lowercase
            base = name.split("LIGATURE")[-1]
            base = base.replace(" ", "").lower()
            cleaned_chars.append(base)
        else:
            cleaned_chars.append(ch)

    return "".join(cleaned_chars)

# Program

204 entries total (3 plenary talks, 22 symposium talks, 116 talks, and 63 posters)

They separate out SMP and ICCM, and anything ICCM doesn't have abstracts or affiliations.




In [17]:
year = '2019'
file_path = pdfs_path + f'smp{year}_program.pdf'

## OCR (Optical Character Recognition)

I'm using OCR because ligatures like "ﬅ" and the one for "ti" are just showing up as the first letter. I think it's typically used for converting scanned, printed PDF documents that are images where you can't grab text or search for text, into searchable PDFs (one page with info on OCR [here](https://www.adobe.com/acrobat/guides/what-is-ocr.html#ocr-meaning)).

From OCRmyPDF's [documentation](https://ocrmypdf.readthedocs.io/en/latest/index.html) and help line below:

> "OCRmyPDF adds an optical character recognition (OCR) text layer to scanned PDF files, allowing them to be searched"

> "Generates a searchable PDF or PDF/A from a regular PDF. OCRmyPDF rasterizes each page of the input PDF, optionally corrects page
rotation and performs image processing, runs the Tesseract OCR engine on the
image, and then creates a PDF from the OCR information."

I already OCRed the PDF, so the code for making it is commented out.

Tutorial on using OCRmyPDF in colab from https://colab.research.google.com/github/louispaulet/OCRmyPDF_google_colab/blob/main/PDF_OCR.ipynb

In [24]:
# !apt install tesseract-ocr
# !apt install libtesseract-dev
# !pip install pytesseract
# !apt install ghostscript
# !pip install ocrmypdf

In [25]:
# # Uncomment if want info on how to use ocrmypdf
# !ocrmypdf --help

In [18]:
output_pdf_path = pdfs_path + "smp2019_program_ocr.pdf"

In [27]:
# !ocrmypdf --force-ocr "{file_path}" "{output_pdf_path}"

## Grab text from the pdf

Also getting text from original PDF because entries that label the authors with numbers for the affiliations weren't extracted properly when using the OCRed PDF.

In [19]:
ocr_program = pymupdf.open(output_pdf_path)

In [20]:
ocr_program_text = pymupdf4llm.to_markdown(ocr_program)

In [30]:
ocr_program_text[17_800:20_000]

'\nPlenary presentations \n\n## Plenary presentations \n\n## All plenary presentations will be held in Ballroom West from 14:00 to 15:00. \n\n## Early Career Award lecture by David Kellen (July 20) \n\n## Testing Representations in Recognition Memory: From Model Fits to Critical Tests \n\nAuthor(s): Kellen, David (Syracuse University, United States of America). Contact: davekellen@gmail.com. Abstract: The topic of this talk concerns a long-standing topic in recognition memory, the comparison between discrete, continuous, and “hybrid” modeling accounts of recognition memory. Specifically, | will discuss how this work has traditionally focused on model fits predicated on strong parametric assumptions, and the importance of a shift towards more general, non-parametric approaches. Here, | will show how some classic results in mathematical psychology, such as Falmagne’s proof on the Block-Marschak inequalities provide a testable foundation for the general notion that memory judgments are ba

In [22]:
program = pymupdf.open(file_path)   # original PDF
program_text = pymupdf4llm.to_markdown(program)

In [37]:
program_text[17_100:19_000]

'**Ballroom West** from 14:00 to 15:00. \n\n## **Early Career Award lecture by David Kellen (July 20) Testng Representatons in Recogniton Memory: From Model Fits to Critcal Tests** \n\n_Author(s):_ Kellen, David (Syracuse University, United States of America). _Contact:_ `davekellen@gmail.com` . _Abstract:_ The topic of this talk concerns a long-standing topic in recogniton memory, the comparison between discrete, contnuous, and “hybrid” modeling accounts of recogniton memory. Specifically, I will discuss how this work has traditonally focused on model fits predicated on strong parametric assumptons, and the importance of a shif towards more general, non-parametric approaches. Here, I will show how some classic results in mathematcal psychology, such as Falmagne’s proof on the Block-Marschak inequalites provide a testable foundaton for the general noton that memory judgments are based on a latent-strength representaton. I will report empirical results supportng the Block-Marschak inequ

## Split up into talk entries

### OCR

In [38]:
# Splits up abstracts
abstracts_start = ocr_program_text.split('West from 14:00 to 15:00. \n\n## ')[1]
initial_entry_split = abstracts_start.split('\n\n## ')
abstract_entries = [entry for entry in initial_entry_split if 'Author(s)' in entry]

# Splits up ICCM posters in the last element of `abstract_entries`
iccm_posters = re.split(r'\n\n(?=\()', abstract_entries[-1])[1:]
abstract_entries = abstract_entries[:-1]
abstract_entries.extend(iccm_posters)

In [39]:
# Removes the text at page breaks and finishes breaking up the entries
no_page_breaks = [remove_page_break_text(entry) for entry in abstract_entries]

final_abstract_entries = []

for entry in no_page_breaks:
    if re.search(r'^ICCM track', entry):
        final_abstract_entries.extend(entry.split(' \n\n'))
    else:
        final_abstract_entries.append(entry)

final_abstract_entries = [entry for entry in final_abstract_entries if 'Author(s)' in entry]

In [40]:
final_abstract_entries[:2]

['Testing Representations in Recognition Memory: From Model Fits to Critical Tests \n\nAuthor(s): Kellen, David (Syracuse University, United States of America). Contact: davekellen@gmail.com. Abstract: The topic of this talk concerns a long-standing topic in recognition memory, the comparison between discrete, continuous, and “hybrid” modeling accounts of recognition memory. Specifically, | will discuss how this work has traditionally focused on model fits predicated on strong parametric assumptions, and the importance of a shift towards more general, non-parametric approaches. Here, | will show how some classic results in mathematical psychology, such as Falmagne’s proof on the Block-Marschak inequalities provide a testable foundation for the general notion that memory judgments are based on a latent-strength representation. | will report empirical results supporting the Block-Marschak inequalities, but also show the close relationship between different types of memory judgments. Fina

### Original pdf

In [41]:
program_abstracts_start = program_text.split('15:00. \n\n## ')[1]
initial_program_split = program_abstracts_start.split('\n\n## ')

In [42]:
labeled_affs_entries = [entry for entry in initial_program_split if '(1:' in entry]

In [43]:
labeled_affs_entries[:2]

['**Joint modeling of cultural consensus and everyday life experiences** \n\n_Author(s):_ Oravecz, Zita[(1)] ; Vandekerckhove, Joachim[(2)] (1: Penn State University; 2: University of California, Irvine). _Contact:_ `zita@psu.edu` . _Abstract:_ Cultural Consensus Theory (CCT) models allow us to explore shared knowledge or beliefs in a culture. Within this framework, with the Extended Consensus Model (ECM) we can describe individual differences among people in terms of consensus knowledge, willingness to guess, and guessing bias. These cognitve characteristcs might have a direct influence on everyday life experiences. To study this, we developed a latent variable model in which process model parameters from intensive longitudinal daily life data and parameters of the ECM can be estmated simultaneously and joined via linear link functons. We apply this model to study whether beliefs on what makes people feel loved are linked to daily life experiences of love. ',
 '**New Results from the 

In [44]:
labeled_affs_entries_dicts = {
    get_title(entry): split_entry(entry, year)\
    for entry in labeled_affs_entries
    }

In [47]:
list(labeled_affs_entries_dicts.items())[:2]

[('Joint modeling of cultural consensus and everyday life experiences',
  {'year': '2019',
   'author(s)': 'Zita[(1)] Oravecz, Joachim[(2)] Vandekerckhove',
   'affiliation(s)': '1: Penn State University; 2: University of California, Irvine',
   'title': '**Joint modeling of cultural consensus and everyday life experiences**',
   'type': '',
   'abstract': 'Cultural Consensus Theory (CCT) models allow us to explore shared knowledge or beliefs in a culture. Within this framework, with the Extended Consensus Model (ECM) we can describe individual differences among people in terms of consensus knowledge, willingness to guess, and guessing bias. These cognitve characteristcs might have a direct influence on everyday life experiences. To study this, we developed a latent variable model in which process model parameters from intensive longitudinal daily life data and parameters of the ECM can be estmated simultaneously and joined via linear link functons. We apply this model to study whether

## Sort authors, affiliations, title, and abstract

In [58]:
parsed_entries = []

for e, entry in enumerate(final_abstract_entries):
  # Sometimes a second entry is attached to the end of one
  # but without its title
  if len(re.findall(r'Author\(s\)', entry)) == 2:
    author_splits = re.split(r'\n\n(?=Author\(s\):)', entry)
    first_entry = '\n\n'.join(author_splits[:2])
    second_entry = author_splits[2]

    # Splits up second entry
    second_entry_parsed = split_entry(second_entry, year)

    # Add second entry to `parsed_entries`
    parsed_entries.append(second_entry_parsed)

  else:
    # Splits entry into title, authors, affiliations, abstract
    entry_dict_elements = split_entry(entry, year)
    parsed_entries.append(entry_dict_elements)

In [59]:
parsed_entries[3:5]

[{'year': '2019',
  'author(s)': "Zita‘) Oravecz, Joachim') Vandekerckhove",
  'affiliation(s)': '1: Penn State University; 2: University of California, Irvine',
  'title': 'Joint modeling of cultural consensus and everyday life experiences',
  'type': '',
  'abstract': 'Cultural Consensus Theory (CCT) models allow us to explore shared knowledge or beliefs in a culture. Within this framework, with the Extended Consensus Model (ECM) we can describe individual differences among people in terms of consensus knowledge, willingness to guess, and guessing bias. These cognitive characteristics might have a direct influence on everyday life experiences. To study this, we developed a latent variable model in which process model parameters from intensive longitudinal daily life data and parameters of the ECM can be estimated simultaneously and joined via linear link functions. We apply this model to study whether beliefs on what makes people feel loved are linked to daily life experiences of lov

## Adds the authors with the number labels for the affiliations

Using fuzzy matching to match the titles because extracting the text using the original pdf (not OCRed one) turns the ligatures into just the first letter in it.

In [60]:
original_titles = list(labeled_affs_entries_dicts.keys())

In [61]:
original_titles[:10]

['Joint modeling of cultural consensus and everyday life experiences',
 'New Results from the Bayesian and Frequentst MPT Multverse',
 'Representng Probabilistc Models of Knowledge Space Theory by Multnomial Processing Tree Models',
 'Bayesian Model Selecton and Model Averaging for Multnomial Processing Trees',
 'Would you bet on it? How life’s gambles impact people’s beliefs',
 'Testng the Separable Representaton of Utlity Theories: An Experiment Evaluatng Monotonicity, Transitvity, and Double Cancellaton',
 'A unified account of repetton blindness and the atentonal blink',
 'A model-based explanaton of performance-related changes in abstract stmulus-response learning',
 'Prevalence induced Biases in Medical Image Decision-making',
 'Temporal control in modelling eye fixatons']

In [62]:
# Modified with help from Gemini 3
for ocr_entry in parsed_entries:
  if re.search(r'^1:', ocr_entry['affiliation(s)']):
    ocr_title = ocr_entry["title"]

    match_key, score = fuzzy_match_title(
        ocr_title,
        original_titles,
        threshold=88
    )

    # Fallback: Match by abstract if title match fails
    if not match_key and ocr_entry.get('abstract'):
        best_score = 0
        for title_key, data in labeled_affs_entries_dicts.items():
            # Compare abstracts
            current_score = fuzz.token_set_ratio(ocr_entry['abstract'], data['abstract'])
            if current_score > 90 and current_score > best_score:
                best_score = current_score
                match_key = title_key

    if match_key:
        ref = labeled_affs_entries_dicts[match_key]

        # Use the title from the reference if the OCR one was empty/missing
        if not ocr_title:
             ocr_entry['title'] = ref['title']

        # Fix authors and associations
        author_label_tuples = parse_labeled_authors(ref["author(s)"])
        aff_dict = parse_affiliation_dict(ocr_entry["affiliation(s)"])

        # The authors returned from this have the broken ligatures too
        new_authors_ref, new_affiliations = make_aff_list(
            author_label_tuples,
            aff_dict
        )

        # Heuristic: If OCR misses commas compared to the number of authors in ref,
        # use the ref authors (accepting broken ligatures is better than concatenated names).
        # We expect N-1 commas for N authors.
        expected_commas = len(author_label_tuples) - 1
        ocr_commas = ocr_entry['author(s)'].count(',')

        if len(author_label_tuples) > 1 and ocr_commas < expected_commas:
            candidate_authors = new_authors_ref
        else:
            candidate_authors = ocr_entry['author(s)']

        # Allow periods and hyphens, but ONLY keep hyphens between letters (e.g. Jean-Paul)
        # Removes:
        # 1. Any char that is NOT a letter, space, comma, dot, or hyphen
        # 2. Any hyphen NOT preceded by a letter
        # 3. Any hyphen NOT followed by a letter
        new_authors = regex.sub(r'[^\p{L}\s,.-]|(?<!\p{L})-|-(?!\p{L})', '', candidate_authors)

        # Fix split names with particles (e.g. "Laura E., de Ruiter")
        # Removes comma if followed by a lowercase letter (assuming particles are lowercase)
        new_authors = regex.sub(r',\s+(?=[a-z])', ' ', new_authors)

        # Add periods to initials if missing (e.g. "David E Huber" -> "David E. Huber")
        # Matches single uppercase letter not preceded by a letter, followed by space, comma or end of string
        new_authors = regex.sub(r'(?<!\p{L})(\p{Lu})(?=\s|,|$)', r'\1.', new_authors)

        new_authors = regex.sub(r'\s+', ' ', new_authors).strip()

        ocr_entry['author(s)'] = new_authors
        ocr_entry['affiliation(s)'] = new_affiliations
    else:
        # There's one without a title I'll fix manually
        print("No match:", ocr_title)

In [63]:
parsed_entries[3:5]

[{'year': '2019',
  'author(s)': 'Zita Oravecz, Joachim Vandekerckhove',
  'affiliation(s)': 'Penn State University; University of California, Irvine',
  'title': 'Joint modeling of cultural consensus and everyday life experiences',
  'type': '',
  'abstract': 'Cultural Consensus Theory (CCT) models allow us to explore shared knowledge or beliefs in a culture. Within this framework, with the Extended Consensus Model (ECM) we can describe individual differences among people in terms of consensus knowledge, willingness to guess, and guessing bias. These cognitive characteristics might have a direct influence on everyday life experiences. To study this, we developed a latent variable model in which process model parameters from intensive longitudinal daily life data and parameters of the ECM can be estimated simultaneously and joined via linear link functions. We apply this model to study whether beliefs on what makes people feel loved are linked to daily life experiences of love.'},
 {'y

# Create df and convert to csv

In [64]:
df = pd.DataFrame(parsed_entries, columns=["year", "author(s)", "affiliation(s)", "title", "type", "abstract"])

In [65]:
df.head()

Unnamed: 0,year,author(s),affiliation(s),title,type,abstract
0,2019,David Kellen,Syracuse University,Testing Representations in Recognition Memory:...,,The topic of this talk concerns a long-standin...
1,2019,Maithilee Kunda,Vanderbilt University,ICCM keynote lecture by Maithilee Kunda (July ...,,Despite evidence for the importance of visual ...
2,2019,Jake Hofman,Microsoft Research,Society for Mathematical Psychology keynote le...,,How does information spread in online social n...
3,2019,"Zita Oravecz, Joachim Vandekerckhove",Penn State University; University of Californi...,Joint modeling of cultural consensus and every...,,Cultural Consensus Theory (CCT) models allow u...
4,2019,"Henrik Singmann, Daniel W. Heck, Marius Barth,...",University of Warwick; University of Mannheim;...,New Results from the Bayesian and Frequentist ...,,Even with a clear hypothesis or cognitive mode...


In [66]:
df.to_csv(f"/content/drive/MyDrive/math_psych_work/csv/smp{year}_program.csv", index=False)