### Dependencies

In [17]:
import emoji
import os
import cv2
import grapheme
import numpy as np
import shutil
import pandas as pd

np.random.seed(0)

# Generate CSVs

### Testing utility functions
* `list[emoji]` -> `str` 
* `str` -> `list[emoji]`

In [None]:
import emoji
import ast

def emoji_to_unicode(emoji_str):
    return ' '.join([f"U+{ord(char):X}" for char in emoji_str])

def process_emoji_list_to_str(emoji_list):
    desc_processed = ' [EM] '.join(desc.strip(':') for desc in emoji_list)
    return f"{desc_processed}."

def unprocess_emoji_list_from_str(emoji_str):
    s = emoji_str[:-1].split(' [EM] ')
    return [f":{desc}:" for desc in s]

def emoji_str_from_description(desc_str: str) -> str:
    """
    Converts a description string containing emoji descriptions into a string of actual emojis.
    Args:
        desc_str (str): A string containing emoji descriptions separated by ' [EM] '.
                        The string is expected to start with "This is" and end with a period (".").

    Returns:
        str: A string of emojis corresponding to the descriptions in the input string.

    Example:
        >>> emoji_str_from_description('This is face_savoring_food [EM] bread.')
        '😋🍞'
    """
    desc_str = desc_str[8:-1]  # Remove "This is" at the start and "." at the end
    desc_list = desc_str.split(' [EM] ')  # Split the string into a list of descriptions
    emoji_lst = []
    for desc in desc_list:
        if desc == "pouting_face":
            emoji_lst.append('😡')
        else:
            emoji_lst.append(emoji.emojize(f":{desc}:"))
    return ''.join(emoji_lst)


# write some tests for processing and unprocessing📈
print(emoji_str_from_description('This is face_savoring_food [EM] bread.'))
print(emoji_to_unicode('🎷'))
print(process_emoji_list_to_str([':necktie:', ':chart_increasing:']))
print(unprocess_emoji_list_from_str(process_emoji_list_to_str([':necktie:', ':chart_increasing:'])))


face_savoring_foodbread
U+1F3B7
necktie [EM] chart_increasing.
[':necktie:', ':chart_increasing:']


In [19]:
import csv
import ast
import os

def generate_csv(input_file_path, output_file_path):
    with open(input_file_path, newline='', encoding='utf-8') as fin, \
        open(output_file_path, 'w', newline='', encoding='utf-8') as fout:

        reader = csv.DictReader(fin)
        fieldnames = ['sent1', 'sent2', 'label', 'strategy'] # needs to have the same fieldnames in the output csv
        writer = csv.DictWriter(fout, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            em = emoji_str_from_description(row['sent1'])
            en = ' '.join(row['sent2'].split()[2:])[:-1] 
            writer.writerow({
                'sent1': em,
                'sent2': en,
                'label': row['label'],
                'strategy': row['strategy'],
            })

    print(f"Conversion complete! Output saved to {output_file_path}")

In [20]:
csvs_to_generate = [
    (
     'train',
     os.path.join('.', 'originals', 'train.csv'), 
     os.path.join('.', 'converted', 'train.csv')), 

    ('test',
     os.path.join('.', 'originals', 'test.csv'), 
     os.path.join('.', 'converted', 'test.csv')), 

    ('val',
     os.path.join('.', 'originals', 'val.csv'), 
     os.path.join('.', 'converted', 'val.csv')), 
]

for folder_type, input, output in csvs_to_generate:
    generate_csv(input, output)

Conversion complete! Output saved to ./converted/train.csv
Conversion complete! Output saved to ./converted/test.csv
Conversion complete! Output saved to ./converted/val.csv
