In [1]:
# This code demonstates how to get attributes for emojis in a list using a reference dictonary compiled for this


# ABOUT THE EMOJI ATTRIBUTE REFERENCE FILE
# The attributes are compiled from Unicode emoji data files emoji-test.txt and also manual review
# Unicode data files: https://unicode.org/Public/emoji/13.0/
# there may be some emojis that render but are not fully supported by Unicode (e.g. family with skintone)
# and thus may not be in the master reference of emoji attributes


# values for emoji group, subgroup, name, status, version are from Unicode
# attributes for shape, color, direction, gender, skin_tone are based on Unicode name
# desc, person_animal_other, anthro_type, were assigned by the author but could be changed
# sentiment did a positive (1) or negative (-1) or neutral (0) based on smileys, this could be modified
# additonal fields could be added for other groupings
# additional resources are: https://emojipedia.org/ and https://unicode.org/emoji/charts/full-emoji-list.html



In [2]:
import getEmojiAttributes

In [3]:
# to see the attributes in the master attribute reference for a single emoji
#getEmojiAttributes.emoji_w_attributes_dict['😀'] # directly access the reference

emoji_of_interest = '😀'

# get a dictionary of attributes for a single emoji, 
# if it is not in the reference list and is a family with skintones it will create the attributes for it
getEmojiAttributes.getAttributesForSingleEmoji(emoji_of_interest)

{'rownum': '1',
 'emoji': '😀',
 'grp_subgrp': 'Smileys & Emotion_face-smiling',
 'desc': 'anthropomorphic',
 'person_animal_other': 'smiley',
 'anthro_type': 'face-gesture',
 'gender': 'neutral',
 'skin_tone': 'neutral',
 'shape_type': '',
 'shape_color': '',
 'direction': '',
 'sentiment_smileys_binary': '1',
 'cldr short name': 'grinning face',
 'codepoint': '1F600',
 'status': 'fully-qualified',
 'group': 'Smileys & Emotion',
 'subgroup': 'face-smiling',
 'char_len': '1',
 'version': 'E1.0'}

In [4]:
#this test_emoji is not in the reference because most family with skin tones are not fully supported by Unicode
# however this code analyzes the emoji and builds the attributes for the family with skin tone emojis on the fly

test_emoji = '👨🏾\u200d👩🏾\u200d👧🏾\u200d👦🏾'

getEmojiAttributes.getAttributesForSingleEmoji(test_emoji)

{'rownum': '',
 'emoji': '👨🏾\u200d👩🏾\u200d👧🏾\u200d👦🏾',
 'grp_subgrp': 'People & Body_family',
 'desc': 'anthropomorphic',
 'person_animal_other': 'person-group',
 'anthro_type': 'group',
 'gender': 'mixed',
 'skin_tone': 'Medium-Dark',
 'shape_type': '',
 'shape_color': '',
 'direction': '',
 'sentiment_smileys_binary': '',
 'cldr short name': 'Family: Man: Medium-Dark Skin Tone, Woman: Medium-Dark Skin Tone, Girl: Medium-Dark Skin Tone, Boy: Medium-Dark Skin Tone',
 'codepoint': '1F468 1F3FE 1F469 1F3FE 1F467 1F3FE 1F466 1F3FE',
 'status': 'not-qualified',
 'group': 'People & Body',
 'subgroup': 'family',
 'char_len': 11,
 'version': ''}

In [5]:
# Get a list of the values for a specific attribute for emojis in a list with that attribute
# the input is a list of emojis and a string of the attribute name, if a list is passed it only takes the first value

attributes_in_ref_to_choose_from = ['group', 'subgroup', 'grp_subgrp',\
                         'desc', 'person_animal_other', 'anthro_type',\
                        'gender', 'skin_tone', \
                        'shape_type', 'shape_color', 'direction',\
                          'emoji', 'cldr short name','sentiment_smileys_binary',
                        'status' , 'char_len', 'version','rownum', 'codepoint']


attribute_to_get = 'group'

test_list_of_emojis = ['4️⃣', '❤️', '🇦', '🇦🇺', '🍎', '👨🏾\u200d👩🏾\u200d👧🏾\u200d👦🏾', '👩🏿\u200d💻', '👪🏿', '🗳️','🗳', '😃', '🟠']

print(attribute_to_get)
print(getEmojiAttributes.getListOfSingleAttributeValuesForEmojiList(test_list_of_emojis, attribute_to_get))


group
['Symbols', 'Smileys & Emotion', 'Symbols', 'Flags', 'Food & Drink', 'People & Body', 'People & Body', 'People & Body', 'Objects', 'Objects', 'Smileys & Emotion', 'Symbols']


In [6]:
# Get a list of the sorted unique values for a specific attribute for emojis in a list with that attribute
# the input is a list of emojis and a string of the attribute name, if a list is passed it only takes the first value

attributes_in_ref_to_choose_from = ['group', 'subgroup', 'grp_subgrp',\
                         'desc', 'person_animal_other', 'anthro_type',\
                        'gender', 'skin_tone', \
                        'shape_type', 'shape_color', 'direction',\
                          'emoji', 'cldr short name','sentiment_smileys_binary',
                        'status' , 'char_len', 'version','rownum', 'codepoint']


attribute_to_get = 'group'

test_list_of_emojis = ['4️⃣', '❤️', '🇦', '🇦🇺', '🍎', '👨🏾\u200d👩🏾\u200d👧🏾\u200d👦🏾', '👩🏿\u200d💻', '👪🏿', '🗳️','🗳', '😃', '🟠']

print(attribute_to_get)
print(getEmojiAttributes.getUniqueListOfSingleAttributeValuesForEmojiList(test_list_of_emojis, attribute_to_get))


group
['Flags', 'Food & Drink', 'Objects', 'People & Body', 'Smileys & Emotion', 'Symbols']


In [7]:
# get a dict of select attributes values for emojis in a list
# input is a list of emojis and a list of attributes

attributes_in_ref_to_choose_from = ['group', 'subgroup', 'grp_subgrp',\
                         'desc', 'person_animal_other', 'anthro_type',\
                        'gender', 'skin_tone', \
                        'shape_type', 'shape_color', 'direction',\
                          'emoji', 'cldr short name','sentiment_smileys_binary',
                        'status' , 'char_len', 'version','rownum', 'codepoint']


test_list_of_emojis = ['4️⃣', '❤️', '🇦', '🇦🇺', '🍎', '👨🏾\u200d👩🏾\u200d👧🏾\u200d👦🏾', '👩🏿\u200d💻', '👪🏿', '🗳️','🗳', '😃', '🟠']
attributes_to_get = ['group','shape_color']

dict_of_values_for_test_emojis = getEmojiAttributes.getDictOfValuesForEmojiAttributesOfInterest(test_list_of_emojis,attributes_to_get)
dict_of_values_for_test_emojis


{'group': ['Symbols',
  'Smileys & Emotion',
  'Symbols',
  'Flags',
  'Food & Drink',
  'People & Body',
  'People & Body',
  'People & Body',
  'Objects',
  'Objects',
  'Smileys & Emotion',
  'Symbols'],
 'shape_color': ['red', 'orange']}

In [8]:
# get a dict of select attributes and sorted unique values for emojis in a list
# input is a list of emojis and a list of attributes

attributes_in_ref_to_choose_from = ['group', 'subgroup', 'grp_subgrp',\
                         'desc', 'person_animal_other', 'anthro_type',\
                        'gender', 'skin_tone', \
                        'shape_type', 'shape_color', 'direction',\
                          'emoji', 'cldr short name','sentiment_smileys_binary',
                        'status' , 'char_len', 'version','rownum', 'codepoint']


test_list_of_emojis = ['4️⃣', '❤️', '🇦', '🇦🇺', '🍎', '👨🏾\u200d👩🏾\u200d👧🏾\u200d👦🏾', '👩🏿\u200d💻', '👪🏿', '🗳️','🗳', '😃', '🟠']
attributes_to_get = ['shape_color','group']

dict_of_values_for_test_emojis = getEmojiAttributes.getDictOfUniqueValuesForEmojiAttributesOfInterest(test_list_of_emojis,attributes_to_get)
dict_of_values_for_test_emojis

{'shape_color': ['orange', 'red'],
 'group': ['Flags',
  'Food & Drink',
  'Objects',
  'People & Body',
  'Smileys & Emotion',
  'Symbols']}

In [9]:
# create a dictionary of all attributes for all emojis in a list

test_list_of_emojis = ['4️⃣', '❤️', '🇦', '🇦🇺', '🍎', '👨🏾\u200d👩🏾\u200d👧🏾\u200d👦🏾', '👩🏿\u200d💻', '👪🏿', '🗳️','🗳', '😃', '🟠']

subset_emoji_attributes_dict = getEmojiAttributes.getDictOfEmojiAttributes(test_list_of_emojis)

print('number of emojis in list:', len(test_list_of_emojis))
print('number of emojis in list found in master attribute ref:', len(subset_emoji_attributes_dict))
print()
print('Show dictionary of emojis with attributes for emoji list')
subset_emoji_attributes_dict
        


number of emojis in list: 12
number of emojis in list found in master attribute ref: 12

Show dictionary of emojis with attributes for emoji list


{'4️⃣': {'rownum': '3794',
  'emoji': '4️⃣',
  'grp_subgrp': 'Symbols_keycap',
  'desc': '',
  'person_animal_other': '',
  'anthro_type': '',
  'gender': '',
  'skin_tone': '',
  'shape_type': '',
  'shape_color': '',
  'direction': '',
  'sentiment_smileys_binary': '',
  'cldr short name': 'keycap: 4',
  'codepoint': '0034 FE0F 20E3',
  'status': 'fully-qualified',
  'group': 'Symbols',
  'subgroup': 'keycap',
  'char_len': '3',
  'version': 'E0.6'},
 '❤️': {'rownum': '133',
  'emoji': '❤️',
  'grp_subgrp': 'Smileys & Emotion_emotion',
  'desc': 'shape',
  'person_animal_other': '',
  'anthro_type': '',
  'gender': '',
  'skin_tone': '',
  'shape_type': 'heart',
  'shape_color': 'red',
  'direction': '',
  'sentiment_smileys_binary': '',
  'cldr short name': 'red heart',
  'codepoint': '2764 FE0F',
  'status': 'fully-qualified',
  'group': 'Smileys & Emotion',
  'subgroup': 'emotion',
  'char_len': '2',
  'version': 'E0.6'},
 '🇦': {'rownum': '4169',
  'emoji': '🇦',
  'grp_subgrp': 'S