In [1]:
import nltk
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
import csv

# Ensure you have the necessary NLTK data
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /home/luigi/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [2]:
# Function to find the synset ID
def find_synset_id(word, lex_num):
    # Iterate through all synsets of the word
    for synset in wn.synsets(word, pos=wn.NOUN):
        # Check if the lexicographer number matches
        if any(lex_num in lemma.key() for lemma in synset.lemmas()):
            return synset.name() 
    return ""

In [3]:
def process_file(input_file, output_file):
    # Initialize the lemmatizer
    lemmatizer = WordNetLemmatizer()

    # Open the input and output files
    with open(input_file, 'r', newline='', encoding='utf-8') as infile, \
         open(output_file, 'w', newline='', encoding='utf-8') as outfile:

        # Create the reader and writer objects
        reader = csv.reader(infile)
        writer = csv.writer(outfile)
        
        # Read the rows of the input file
        for i, row in enumerate(reader):     
            # Lemmatize the word
            lemma = lemmatizer.lemmatize(row[0].lower(), wn.NOUN)
            # Add the synset ID to the row
            row[3] = find_synset_id(lemma, row[1])
            # Write the row to the output file
            writer.writerow(row)

In [4]:
# Example usage
input_file_path = 'my_data.csv'  # Replace with your file path
output_file_path = 'final_data.csv'
process_file(input_file_path, output_file_path)