# Gmail Cleaning: Encodings & Emojis

This notebook develops functions for identifying text encodings, rendering emojis, and engineering features related to encoding and emojis, which could impact email engagement. New features include:
* **encoding:** The specific type of encoding
* **emoji_in:** True or False to indicate if emoji is present in subject line
* **emoji_num:** The number of emojis in the subject line
* **emoji_name:** The name of the emoji character

### Resources

I created a table saved locally using this **[Emoji Unicode Table by Tim Whitlock](https://apps.timwhitlock.info/emoji/tables/unicode)**.  This is the only source I've seen for the emoji utf-8 'bytes' codepoints. 

# Packages

In [576]:
import pandas as pd
from email.header import decode_header
import math

## Completed EE Cleaning Functions

In [1034]:
def decode_sub_name(df):
    """
    Decodes standard encodings like utf-8 into plain text for all observations in the 'subject' and 'name' fields of a DataFrame. Also creates a new field in the DataFrame, 'encoding', that stores the encoding type.
    
    Input: DataFrame with 'subject' and 'name' features.
    
    Output: New DataFrame with decoded 'subject' and 'name'
    
    
    """
    
    # change subject line and name observations to string type so we can decode
    df['subject'] = df['subject'].astype(str)
    df['name'] = df['name'].astype(str)
    
    # 1. iterate over each observation to decode UTF-8 encoded subject lines
    # 2. grab the encoding type from the tuple and populate the 'encoding' feature
    # 3. if an observsation in the encoding feature contains 'nan', convert it to 'None'
    # 4. ensure each observation only contains the string subject line
    for i in df.index:
        df.at[i, 'subject'] = decode_header(df.at[i, 'subject'])
        df.at[i, 'name'] = decode_header(df.at[i, 'name'])
        df.at[i, 'encoding'] = str(df.subject[i][0][1])
        if df.encoding[i] == 'nan':
            df.at[i, 'encoding'] = 'None'
        else:
            pass
        df.at[i, 'subject'] = df.subject[i][0][0]
        df.at[i, 'name'] = df.name[i][0][0]
    
    return df


def clean_column(df, column):
    """
    Removes extraneous characters ['\\','"',"'", "b'"] from all observations in a column.
    
    """
    for i in df.index:
        # convert column observations to string
        df.at[i, column] = str(df[column][i])
        # remove backslahses from subject line
        df.at[i, column] = df[column][i].replace('\\','')
        # remove quote marks from subject line
        df.at[i, column] = df[column][i].replace('"','')
        # remove the bytes tag "b'" from subject line
        df.at[i, column] = df[column][i].replace("b'",'')
        # remove single quote marks from subject line
        df.at[i, column] = df[column][i].replace("'",'')
    
    return df
    
def clean_sub_name(df):
    """
    Removes extraneous characters ['\\','"',"'", "b'"] from all observations in the subject line and name columns.
    
    Input: DataFrame with 'subject' and 'name' features.
    
    Output: New DataFrame
    
    """
    df = clean_column(df, 'subject')
    df = clean_column(df, 'name')
    return df

def emoji_in_num_name(df, emoji_lib):
    """
    Identifies emojis in utf-8 codepoints, unicode codepoints or native format and creates new emoji features in your df.
    
    Input: DataFrame with 'subject' feature
    
    Output: New DataFrame with three new features.
        - 'emoji_in': True is an emoji is present in the subject, else False
        - 'emoji_num': the number of emojis in the subject, 0 is no emoji is present
        - 'emoji_name': the names of the emojis in the subject, 'None' if no emoji is present
    
    """
    
    # check each subject line if it contains an emoji listed in the library.
    # add all emojis to a list called emojis
    for i in df.index:
        utf_8 = [ele for ele in emoji_lib['utf-8'] if(ele in df.subject[i])]
        unicode = [ele for ele in emoji_lib['unicode'] if(ele in df.subject[i])]
        native = [ele for ele in emoji_lib['native'] if(ele in df.subject[i])]
        emojis = pd.DataFrame(set(utf_8 + unicode + native), columns = ['emo'])
        
        # check the length of emoji list to create a new feature called 'emoji-in' that contains:
        # False if no emoji is in a subject line
        # True if an emoji is in a subject line
        if len(emojis['emo']) == 0:
            df.at[i, 'emoji_in'] = 'False'
        else:
            df.at[i, 'emoji_in'] = 'True'
        
        # add the number of emojis in the subject line into a new feature called 'emoji_num'
        df.at[i, 'emoji_num'] = len(emojis['emo'])
        
        #create an empty dataframe that mirrors the emoji library dataset
        emoji_ind = pd.DataFrame(columns = ['type','native','android','symbol','unicode','utf-8','description'])
        
        # append the rows associated with the subject line emojis to the empty dataframe
        for x in emojis.index:
            emoji_ind = pd.concat([emoji_ind, emoji_lib[emoji_lib['utf-8'] == emojis.emo[x]]])

        # grab the names of the emojis and insert as a string into a new feature called 'emoji_name'
        df.at[i, 'emoji_name'] = str(list(emoji_ind['description']))
        
        # if there is no emoji name, replace the empty list with 'None'
        # else clean the string emoji names by removing the list brackets.
        if df['emoji_name'][i] == '[]':
            df.at[i, 'emoji_name'] = "None"
        else:
            df.at[i, 'emoji_name'] = df.emoji_name[i].replace('[','')
            df.at[i, 'emoji_name'] = df.emoji_name[i].replace(']','')
            
    return df

## Data Import

In [1035]:
df = pd.read_pickle('mfflavell_emails_NED.pkl')
emoji_lib = pd.read_csv('emoji_library_cleaned.csv')

In [1036]:
df.head()

Unnamed: 0,date,subject,name,email,domain
0,2019-11-05 12:52:10-08:00,Finish setting up your new Google Account,Google Community Team,googlecommunityteam-noreply@google.com,google.com
1,2019-11-06 15:51:29+00:00,Job Search - Invitation to collaborate,Frank Flavell (via Google Drive),drive-shares-noreply@google.com,google.com
2,2019-11-08 18:09:46-08:00,U.S. Census Bureau Prospective Candidate Confi...,noreply@csod.com,noreply@csod.com,csod.com
3,2019-11-08 18:12:30-08:00,U.S. Census Bureau Prospective Candidate Confi...,noreply@csod.com,noreply@csod.com,csod.com
4,2019-11-12 22:25:02+00:00,Notification from Akins HR Team,Akins Team,info@akinsc.com,akinsc.com


In [1037]:
df.tail()

Unnamed: 0,date,subject,name,email,domain
17008,2022-09-23 21:33:42+00:00,Up to $25 off tastes so sweet. Can we just say...,Uber Eats,uber@uber.com,uber.com
17009,2022-09-23 22:25:40+00:00,"Matthew Flavell, will you rate your transactio...",Amazon Marketplace,marketplace-messages@amazon.com,amazon.com
17010,2022-09-23 23:04:11+00:00,Re: Flatiron DS Full Curriculum,rajeev panwar,panwar_rajeev@hotmail.com,hotmail.com
17011,2022-09-23 23:11:38+00:00,Your Weekend Watch Guide Is Here,HBO Max,HBOMax@mail.hbomax.com,mail.hbomax.com
17012,2022-09-24 00:00:20+00:00,=?utf-8?B?8J+OuSBIb3cgbXVjaCBwaWFubyBoYXZlIHlv...,Levi from Simply Piano,play@piano.hellosimply.com,piano.hellosimply.com


In [1038]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17012 entries, 0 to 17012
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   date     17012 non-null  object
 1   subject  17012 non-null  object
 2   name     17012 non-null  object
 3   email    17012 non-null  object
 4   domain   17012 non-null  object
dtypes: object(5)
memory usage: 797.4+ KB


In [1039]:
emoji_lib.head()

Unnamed: 0,type,native,android,symbol,unicode,utf-8,description
0,emoticon,😁,😁,😁,U+1F601,xf0x9fx98x81,grinning face with smiling eyes
1,emoticon,😂,😂,😂,U+1F602,xf0x9fx98x82,face with tears of joy
2,emoticon,😃,😃,😃,U+1F603,xf0x9fx98x83,smiling face with open mouth
3,emoticon,😄,😄,😄,U+1F604,xf0x9fx98x84,smiling face with open mouth and smiling eyes
4,emoticon,😅,😅,😅,U+1F605,xf0x9fx98x85,smiling face with open mouth and cold sweat


In [1040]:
emoji_lib.tail()

Unnamed: 0,type,native,android,symbol,unicode,utf-8,description
837,misc,🕣,🕣,🕣,U+1F563,xf0x9fx95xa3,clock face eight-thirty
838,misc,🕤,🕤,🕤,U+1F564,xf0x9fx95xa4,clock face nine-thirty
839,misc,🕥,🕥,🕥,U+1F565,xf0x9fx95xa5,clock face ten-thirty
840,misc,🕦,🕦,🕦,U+1F566,xf0x9fx95xa6,clock face eleven-thirty
841,misc,🕧,🕧,🕧,U+1F567,xf0x9fx95xa7,clock face twelve-thirty


In [1041]:
emoji_lib.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 842 entries, 0 to 841
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   type         842 non-null    object
 1   native       842 non-null    object
 2   android      842 non-null    object
 3   symbol       842 non-null    object
 4   unicode      842 non-null    object
 5   utf-8        842 non-null    object
 6   description  842 non-null    object
dtypes: object(7)
memory usage: 46.2+ KB


## Cleaning Pipeline

In [1042]:
df = decode_sub_name(df)

In [1043]:
df = clean_sub_name(df)

In [1046]:
df = emoji_in_num_name(df, emoji_lib)

In [1047]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17012 entries, 0 to 17012
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   date        17012 non-null  object 
 1   subject     17012 non-null  object 
 2   name        17012 non-null  object 
 3   email       17012 non-null  object 
 4   domain      17012 non-null  object 
 5   encoding    17012 non-null  object 
 6   emoji_in    17012 non-null  object 
 7   emoji_num   17012 non-null  float64
 8   emoji_name  17012 non-null  object 
dtypes: float64(1), object(8)
memory usage: 1.8+ MB


In [1048]:
df.head(20)

Unnamed: 0,date,subject,name,email,domain,encoding,emoji_in,emoji_num,emoji_name
0,2019-11-05 12:52:10-08:00,Finish setting up your new Google Account,Google Community Team,googlecommunityteam-noreply@google.com,google.com,,False,0.0,
1,2019-11-06 15:51:29+00:00,Job Search - Invitation to collaborate,Frank Flavell (via Google Drive),drive-shares-noreply@google.com,google.com,,False,0.0,
2,2019-11-08 18:09:46-08:00,U.S. Census Bureau Prospective Candidate Confi...,noreply@csod.com,noreply@csod.com,csod.com,,False,0.0,
3,2019-11-08 18:12:30-08:00,U.S. Census Bureau Prospective Candidate Confi...,noreply@csod.com,noreply@csod.com,csod.com,,False,0.0,
4,2019-11-12 22:25:02+00:00,Notification from Akins HR Team,Akins Team,info@akinsc.com,akinsc.com,,False,0.0,
5,2019-11-14 01:08:17+00:00,SR Submitted # 311-01139022,SRNotice,SRNotice@customercare.nyc.gov,customercare.nyc.gov,,False,0.0,
6,2020-02-21 11:20:21-08:00,Learn more about our updated Terms of Service,Google,noreply-utos@google.com,google.com,,False,0.0,
7,2020-04-24 10:02:50-04:00,AdvantageCare Physicians would love your feedb...,AdvantageCare Physicians,noreply@patients.pgsurveying.com,patients.pgsurveying.com,,False,0.0,
8,2020-04-29 00:28:21-04:00,AdvantageCare Physicians would love your feedb...,AdvantageCare Physicians,noreply@patients.pgsurveying.com,patients.pgsurveying.com,,False,0.0,
9,2020-08-24 16:02:59+00:00,"Hatch Rest: Tap on, tap off",Hatch,hatch@hatchbaby.com,hatchbaby.com,utf-8,False,0.0,


In [1049]:
df.tail(20)

Unnamed: 0,date,subject,name,email,domain,encoding,emoji_in,emoji_num,emoji_name
16993,2022-09-23 16:30:42+00:00,Critics are buzzing for xe2x80x98The Lord of t...,Prime Video,no-reply@primevideo.com,primevideo.com,utf-8,False,0.0,
16994,2022-09-23 16:44:37+00:00,Security alert,Google,no-reply@accounts.google.com,accounts.google.com,,False,0.0,
16995,2022-09-23 16:51:18+00:00,Make Venmo your go-to,Venmo,venmo@email.venmo.com,email.venmo.com,,False,0.0,
16996,2022-09-23 13:09:05-04:00,Re: Berkeley Carroll renovations,Jennifer Jacobs,jjacobsnyc@gmail.com,gmail.com,,False,0.0,
16997,2022-09-23 17:16:29+00:00,[GitHub] An email address was added to your ac...,GitHub,noreply@github.com,github.com,,False,0.0,
16998,2022-09-23 17:30:11+00:00,LIVE,Info @ Nucamp,info@nucamp.co,nucamp.co,,False,0.0,
16999,2022-09-23 12:36:45-05:00,xf0x9fx91x89 Do this before starting a business,Tom Wang,support@fbamasterclass.ca,fbamasterclass.ca,utf-8,True,1.0,'white right pointing backhand index'
17000,2022-09-23 11:47:55-06:00,Earn a certificate from a top university,Coursera,Coursera@email.coursera.org,email.coursera.org,,False,0.0,
17001,2022-09-23 17:50:24+00:00,New jobs similar to Analytic Consultant - Fede...,LinkedIn,jobs-noreply@linkedin.com,linkedin.com,,False,0.0,
17002,2022-09-23 17:56:18+00:00,"Google is hiring: Business Analyst, YouTube Tr...",LinkedIn,jobs-listings@linkedin.com,linkedin.com,,False,0.0,


In [1050]:
df.to_csv("mfflavell_emails_NED_EE.csv")

## Function Development

This is a record of my process for analyzing the data and developing the functions.

In [649]:
df = pd.read_pickle('mfflavell_emails_NED.pkl')

## Cleaning Checklist

* [X] Decode utf-8 encoded subject lines
* [X] Create a new encoding feature based on the tuple result of the decoding
* [X] Remove the "\\" between emoji encodings so we can match codes with names using the Emoji Library
* [X] Create the features emoji_in, emoji_num and emoji_name

### Decode UTF-8 Subject Lines

In [1067]:
'🏊🏻‍♀'.encode('utf-8')

b'\xf0\x9f\x8f\x8a\xf0\x9f\x8f\xbb\xe2\x80\x8d\xe2\x99\x80'

In [1066]:
'🫨'.encode('utf-8')

b'\xf0\x9f\xab\xa8'

In [1064]:
'🚀'.encode('utf-8')

b'\xf0\x9f\x9a\x80'

In [1063]:
'🚀'.encode('utf-8')

b'\xf0\x9f\x9a\x80'

In [1065]:
b'\xf0\x9f\x9a\x80'.decode('utf-8')

'🚀'

In [1059]:

b"xe2x9cxa8".decode()

'xe2x9cxa8'

In [650]:
type(df.subject[0])

str

Convert every observation into string datatype so we can decode.

In [651]:
df.subject = df.subject.astype(str)

Iterate over each observation to decode UTF-8 encoded subject lines

In [548]:
for i in df.index:
    df.at[i, 'subject'] = decode_header(df.at[i, 'subject'])

In [549]:
type(df.subject[0])

list

In [550]:
type(df.subject[0][0])

tuple

In [560]:
print(df.subject[0][0][1] == None)

True


In [578]:
type(df.subject[0][0][1])

NoneType

Iterate over each subject to replace the tuple with the string subject line.

In [561]:
for i in df.index:
    df.at[i, 'encoding'] = df.subject[i][0][1]

In [563]:
df.head(50)

Unnamed: 0,date,subject,name,email,domain,encoding
0,2019-11-05 12:52:10-08:00,"[(Finish setting up your new Google Account, N...",Google Community Team,googlecommunityteam-noreply@google.com,google.com,
1,2019-11-06 15:51:29+00:00,"[(Job Search - Invitation to collaborate, None)]",Frank Flavell (via Google Drive),drive-shares-noreply@google.com,google.com,
2,2019-11-08 18:09:46-08:00,[(U.S. Census Bureau Prospective Candidate Con...,noreply@csod.com,noreply@csod.com,csod.com,
3,2019-11-08 18:12:30-08:00,[(U.S. Census Bureau Prospective Candidate Con...,noreply@csod.com,noreply@csod.com,csod.com,
4,2019-11-12 22:25:02+00:00,"[(Notification from Akins HR Team, None)]",Akins Team,info@akinsc.com,akinsc.com,
5,2019-11-14 01:08:17+00:00,"[(SR Submitted # 311-01139022, None)]",SRNotice,SRNotice@customercare.nyc.gov,customercare.nyc.gov,
6,2020-02-21 11:20:21-08:00,[(Learn more about our updated Terms of Servic...,Google,noreply-utos@google.com,google.com,
7,2020-04-24 10:02:50-04:00,[(AdvantageCare Physicians would love your fee...,AdvantageCare Physicians,noreply@patients.pgsurveying.com,patients.pgsurveying.com,
8,2020-04-29 00:28:21-04:00,[(AdvantageCare Physicians would love your fee...,AdvantageCare Physicians,noreply@patients.pgsurveying.com,patients.pgsurveying.com,
9,2020-08-24 16:02:59+00:00,"[(b'Hatch Rest: Tap on, tap off', utf-8)]",=?utf-8?Q?Hatch?=,hatch@hatchbaby.com,hatchbaby.com,utf-8


In [580]:
df['encoding'] = df['encoding'].astype(str)

In [588]:
df['encoding'][0] == 'nan'

True

In [205]:
for i in df.index:
    df.at[i, 'subject'] = df.subject[i][0][0]

In [208]:
type(df.subject[1245])

str

In [227]:
df.head(50)

Unnamed: 0,date,from,subject,name,email,domain
0,2019-11-05 12:52:10-08:00,Google Community Team <googlecommunityteam-nor...,Finish setting up your new Google Account,Google Community Team,googlecommunityteam-noreply@google.com,google.com
1,2019-11-06 15:51:29+00:00,"""Frank Flavell (via Google Drive)"" <drive-shar...",Job Search - Invitation to collaborate,Frank Flavell (via Google Drive),drive-shares-noreply@google.com,google.com
2,2019-11-08 18:09:46-08:00,noreply@csod.com,U.S. Census Bureau Prospective Candidate Confi...,noreply@csod.com,noreply@csod.com,csod.com
3,2019-11-08 18:12:30-08:00,noreply@csod.com,U.S. Census Bureau Prospective Candidate Confi...,noreply@csod.com,noreply@csod.com,csod.com
4,2019-11-12 22:25:02+00:00,"""Akins Team"" <info@akinsc.com>",Notification from Akins HR Team,Akins Team,info@akinsc.com,akinsc.com
5,2019-11-14 01:08:17+00:00,SRNotice <SRNotice@customercare.nyc.gov>,SR Submitted # 311-01139022,SRNotice,SRNotice@customercare.nyc.gov,customercare.nyc.gov
6,2020-02-21 11:20:21-08:00,Google <noreply-utos@google.com>,Learn more about our updated Terms of Service,Google,noreply-utos@google.com,google.com
7,2020-04-24 10:02:50-04:00,"""AdvantageCare Physicians"" <noreply@patients.p...",AdvantageCare Physicians would love your feedb...,AdvantageCare Physicians,noreply@patients.pgsurveying.com,patients.pgsurveying.com
8,2020-04-29 00:28:21-04:00,"""AdvantageCare Physicians"" <noreply@patients.p...",AdvantageCare Physicians would love your feedb...,AdvantageCare Physicians,noreply@patients.pgsurveying.com,patients.pgsurveying.com
9,2020-08-24 16:02:59+00:00,=?utf-8?Q?Hatch?= <hatch@hatchbaby.com>,"b'Hatch Rest: Tap on, tap off'",=?utf-8?Q?Hatch?=,hatch@hatchbaby.com,hatchbaby.com


Convert all 

In [632]:
df.subject = df.subject.astype(str)

In [629]:
df['subject'] = df['subject'].astype(bytes)

In [630]:
type(df['subject'][0])

numpy.bytes_

In [633]:
df['subject'] = df['subject'].apply(lambda x: x.encode('utf-8'))

In [635]:
type(df['subject'][0])

bytes

In [645]:
df['subject'] = df['subject'].apply(lambda x: x.replace("b'", ''))

TypeError: a bytes-like object is required, not 'str'

In [646]:
for i in df.index:
    df.at[i, 'subject'] = df.subject[i].replace("b'", '')

TypeError: a bytes-like object is required, not 'str'

In [232]:
df['subject'] = df['subject'].apply(lambda x: x.rstrip("'"))

In [239]:
df.head(50)

Unnamed: 0,date,from,subject,name,email,domain
0,2019-11-05 12:52:10-08:00,Google Community Team <googlecommunityteam-nor...,Finish setting up your new Google Account,Google Community Team,googlecommunityteam-noreply@google.com,google.com
1,2019-11-06 15:51:29+00:00,"""Frank Flavell (via Google Drive)"" <drive-shar...",Job Search - Invitation to collaborate,Frank Flavell (via Google Drive),drive-shares-noreply@google.com,google.com
2,2019-11-08 18:09:46-08:00,noreply@csod.com,U.S. Census Bureau Prospective Candidate Confi...,noreply@csod.com,noreply@csod.com,csod.com
3,2019-11-08 18:12:30-08:00,noreply@csod.com,U.S. Census Bureau Prospective Candidate Confi...,noreply@csod.com,noreply@csod.com,csod.com
4,2019-11-12 22:25:02+00:00,"""Akins Team"" <info@akinsc.com>",Notification from Akins HR Team,Akins Team,info@akinsc.com,akinsc.com
5,2019-11-14 01:08:17+00:00,SRNotice <SRNotice@customercare.nyc.gov>,SR Submitted # 311-01139022,SRNotice,SRNotice@customercare.nyc.gov,customercare.nyc.gov
6,2020-02-21 11:20:21-08:00,Google <noreply-utos@google.com>,Learn more about our updated Terms of Service,Google,noreply-utos@google.com,google.com
7,2020-04-24 10:02:50-04:00,"""AdvantageCare Physicians"" <noreply@patients.p...",AdvantageCare Physicians would love your feedb...,AdvantageCare Physicians,noreply@patients.pgsurveying.com,patients.pgsurveying.com
8,2020-04-29 00:28:21-04:00,"""AdvantageCare Physicians"" <noreply@patients.p...",AdvantageCare Physicians would love your feedb...,AdvantageCare Physicians,noreply@patients.pgsurveying.com,patients.pgsurveying.com
9,2020-08-24 16:02:59+00:00,=?utf-8?Q?Hatch?= <hatch@hatchbaby.com>,"Hatch Rest: Tap on, tap off",=?utf-8?Q?Hatch?=,hatch@hatchbaby.com,hatchbaby.com


In [234]:
def column_decode(df, column):
    
    # satisfy dependencies
    from email.header import decode_header
    
    # change column observations to string type so we can decode
    df[column] = df[column].astype(str)
    
    # iterate over each observation to decode UTF-8 encoded subject lines
    # ensure each observation only contains the string subject line
    for i in df.index:
        df.at[i, 'subject'] = decode_header(df.at[i, 'subject'])
        df.at[i, 'subject'] = df.subject[i][0][0]
    
    # remove extraneous quote marks from each observation in the column
    df[column] = df[column].apply(lambda x: x.replace('"', ''))
    
    # remove "b'" at the beginning of observations
    df[column] = df[column].apply(lambda x: x.replace("b'", ''))
    
    # remove extraneous single quote mark at the end of observations
    df[column] = df[column].apply(lambda x: x.rstrip("'"))

In [852]:
😁

SyntaxError: invalid character '😁' (U+1F601) (4001062033.py, line 1)

In [856]:
b'\U+1F601'.decode('utf-8')

'\\U+1F601'

In [851]:
b'\xe2\x80\x9c'.decode('utf-8')

'“'

### Render Emojis & Create Feature of Emoji Name

In [712]:
emoji_lib = pd.read_csv('emoji_library.csv')

In [713]:
emoji_lib.head()

Unnamed: 0,type,native,android,symbol,unicode,utf-8,description
0,emoticon,😁,😁,😁,U+1F601,\xF0\x9F\x98\x81,grinning face with smiling eyes
1,emoticon,😂,😂,😂,U+1F602,\xF0\x9F\x98\x82,face with tears of joy
2,emoticon,😃,😃,😃,U+1F603,\xF0\x9F\x98\x83,smiling face with open mouth
3,emoticon,😄,😄,😄,U+1F604,\xF0\x9F\x98\x84,smiling face with open mouth and smiling eyes
4,emoticon,😅,😅,😅,U+1F605,\xF0\x9F\x98\x85,smiling face with open mouth and cold sweat


In [714]:
emoji_lib['utf-8'] = emoji_lib['utf-8'].apply(lambda x: x.replace("\\", ""))

In [715]:
emoji_lib

Unnamed: 0,type,native,android,symbol,unicode,utf-8,description
0,emoticon,😁,😁,😁,U+1F601,xF0x9Fx98x81,grinning face with smiling eyes
1,emoticon,😂,😂,😂,U+1F602,xF0x9Fx98x82,face with tears of joy
2,emoticon,😃,😃,😃,U+1F603,xF0x9Fx98x83,smiling face with open mouth
3,emoticon,😄,😄,😄,U+1F604,xF0x9Fx98x84,smiling face with open mouth and smiling eyes
4,emoticon,😅,😅,😅,U+1F605,xF0x9Fx98x85,smiling face with open mouth and cold sweat
...,...,...,...,...,...,...,...
837,misc,🕣,🕣,🕣,U+1F563,xF0x9Fx95xA3,clock face eight-thirty
838,misc,🕤,🕤,🕤,U+1F564,xF0x9Fx95xA4,clock face nine-thirty
839,misc,🕥,🕥,🕥,U+1F565,xF0x9Fx95xA5,clock face ten-thirty
840,misc,🕦,🕦,🕦,U+1F566,xF0x9Fx95xA6,clock face eleven-thirty


In [716]:
type(emoji_lib['utf-8'][0])

str

In [823]:
for i in emoji_lib.index:
    emoji_lib.at[i,'utf-8'] = emoji_lib['utf-8'][i].lower()

In [824]:
emoji_lib.head()

Unnamed: 0,type,native,android,symbol,unicode,utf-8,description
0,emoticon,😁,😁,😁,U+1F601,xf0x9fx98x81,grinning face with smiling eyes
1,emoticon,😂,😂,😂,U+1F602,xf0x9fx98x82,face with tears of joy
2,emoticon,😃,😃,😃,U+1F603,xf0x9fx98x83,smiling face with open mouth
3,emoticon,😄,😄,😄,U+1F604,xf0x9fx98x84,smiling face with open mouth and smiling eyes
4,emoticon,😅,😅,😅,U+1F605,xf0x9fx98x85,smiling face with open mouth and cold sweat


In [825]:
emoji_lib.to_csv('emoji_library_cleaned.csv', index=False)

In [701]:
test_emo = df.subject[13]

In [702]:
test_emo

'xf0x9fx91x8bWelcome to Kano World'

In [703]:
test_emo = str(test_emo)

In [704]:
type(test_emo)

str

In [669]:
test_emo = test_emo.replace('\\','')
print(test_emo)

"b'xf0x9fx91x8bWelcome to Kano World'"

In [695]:
test_emo = test_emo.replace('"','')
print(test_emo)

\xf0\x9f\x91\x8bWelcome to Kano World'


In [694]:
test_emo = test_emo.replace("b'",'')
print(test_emo)

\xf0\x9f\x91\x8bWelcome to Kano World'


In [696]:
test_emo = test_emo.replace("'",'')
print(test_emo)

\xf0\x9f\x91\x8bWelcome to Kano World


In [705]:
if 'xf0x9fx91x8b' in test_emo:
    print(True)
else:
    print(False)

True


In [706]:
test_emo.find('xf0x9fx91x8b')

0

In [707]:
if test_emo.find('xf0x9fx91x8b') == -1:
    print("No 'is' here!")
else:
    print("Found 'is' in the string.")

Found 'is' in the string.


In [752]:
emoji_lib['native'][0]

'😁'

In [731]:
"\UF09F9885"

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 0-9: illegal Unicode character (230861224.py, line 1)

In [727]:
test_emo.replace('xf0x9fx91x8b', "\U+1F44B ")

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 0-1: truncated \UXXXXXXXX escape (4162051383.py, line 1)

In [721]:
myres = [ele for ele in emoji_lib['utf-8'] if(ele in test_emo)]

In [722]:
print(myres)

['xf0x9fx91x8b']


In [746]:
for i in df.index:
    myres = [ele for ele in emoji_lib['utf-8'] if(ele in df.subject[i])]
    if len(myres) == 0:
        print('nope')
    else:
        print('YES!')

nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
YES!
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
YES!
nope
nope
nope
nope
nope
YES!
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
YES!
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
YES!
nope
nope
nope
nope
YES!
YES!
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
YES!
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
YES!
nope
nope
nope
YES!
nope
nope
nope
YES!
YES!
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!


nope
nope
nope
nope
nope
YES!
nope
YES!
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
YES!
YES!
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope


nope
nope
nope
nope
YES!
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
YES!
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope


nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
YES!
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
YES!
nope
nope
nope
YES!


nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
YES!
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
YES!
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
YES!
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
YES!
nope
nope
nope
nope
YES!
nope
nope
nope
YES!
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


In [747]:
for i in df.index:
    myres = [ele for ele in emoji_lib['utf-8'] if(ele in df.subject[i])]
    if len(myres) == 0:
        df.at[i, 'emoji_in'] = 'False'
    else:
        df.at[i, 'emoji_in'] = 'True'

In [749]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17012 entries, 0 to 17012
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   date          17012 non-null  object 
 1   subject       17012 non-null  object 
 2   name          17012 non-null  object 
 3   email         17012 non-null  object 
 4   domain        17012 non-null  object 
 5   encoding      17012 non-null  object 
 6   emoji_in      17012 non-null  object 
 7   emoji_number  13 non-null     float64
 8   emoji_name    13 non-null     object 
dtypes: float64(1), object(8)
memory usage: 1.8+ MB


emoji_in: is an emoji present?
emoji_list: output a list of emojis in an input
emoji_num: count the number of emojis in a string
emoji_name: create a list of emoji names in a string
emoji_replace: replace a unicode

In [None]:
def emoji_list(s, lib):
    if type(s) == str:
        utf_8 = [ele for ele in lib['utf-8'] if(ele in s)]
        unicode = [ele for ele in lib['unicode'] if(ele in s)]
        native = [ele for ele in lib['native'] if(ele in s)]
        emojis = utf_8 + unicode + native
        emojis = list(set(emojis))
    elif type(s) == list
        for i in s:
            myres = [ele for ele in lib['utf-8'] if(ele in s)]
    elif type(s) == pandas.core.series.Series
    
    elif type(s) == pandas.core.frame.DataFrame
    
    for i in df.index:
        myres = [ele for ele in lib['utf-8'] if(ele in df.subject[i])]
    
    return emojis

In [772]:
type(emoji_lib)

pandas.core.frame.DataFrame

In [768]:
for i in df.index:
    myres = [ele for ele in emoji_lib['utf-8'] if(ele in df.subject[i])]
    if len(myres) == 0:
        df.at[i, 'emoji_number'] = 0
        df.at[i, 'emoji_name'] = 'None'
    elif len(myres) == 1:
        ind = emoji_lib[emoji_lib['utf-8'] == myres[0]].index[0]
        df.at[i, 'emoji_number'] = 1
        df.at[i, 'emoji_name'] = emoji_lib['description'][ind]
        df.at[i, 'subject'] = df.subject[i].replace(myres[0], '<emoji> ')
    elif len(myres) == 2:
        ind = emoji_lib[emoji_lib['utf-8'] == myres[0]].index[0]
        ind2 = emoji_lib[emoji_lib['utf-8'] == myres[1]].index[0]
        df.at[i, 'emoji_number'] = 2
        df.at[i, 'emoji_name'] = emoji_lib['description'][ind]
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind2])
        df.at[i, 'subject'] = df.subject[i].replace(myres[0], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[1], '<emoji> ')
    elif len(myres) == 3:
        ind = emoji_lib[emoji_lib['utf-8'] == myres[0]].index[0]
        ind2 = emoji_lib[emoji_lib['utf-8'] == myres[1]].index[0]
        ind3 = emoji_lib[emoji_lib['utf-8'] == myres[2]].index[0]
        df.at[i, 'emoji_number'] = 3
        df.at[i, 'emoji_name'] = emoji_lib['description'][ind]
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind2])
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind3])
        df.at[i, 'subject'] = df.subject[i].replace(myres[0], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[1], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[2], '<emoji> ')
    elif len(myres) == 4:
        ind = emoji_lib[emoji_lib['utf-8'] == myres[0]].index[0]
        ind2 = emoji_lib[emoji_lib['utf-8'] == myres[1]].index[0]
        ind3 = emoji_lib[emoji_lib['utf-8'] == myres[2]].index[0]
        ind4 = emoji_lib[emoji_lib['utf-8'] == myres[3]].index[0]
        df.at[i, 'emoji_number'] = 4
        df.at[i, 'emoji_name'] = emoji_lib['description'][ind]
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind2])
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind3])
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind4])
        df.at[i, 'subject'] = df.subject[i].replace(myres[0], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[1], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[2], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[3], '<emoji> ')
    elif len(myres) == 5:
        ind = emoji_lib[emoji_lib['utf-8'] == myres[0]].index[0]
        ind2 = emoji_lib[emoji_lib['utf-8'] == myres[1]].index[0]
        ind3 = emoji_lib[emoji_lib['utf-8'] == myres[2]].index[0]
        ind4 = emoji_lib[emoji_lib['utf-8'] == myres[3]].index[0]
        ind5 = emoji_lib[emoji_lib['utf-8'] == myres[4]].index[0]
        df.at[i, 'emoji_number'] = 5
        df.at[i, 'emoji_name'] = emoji_lib['description'][ind]
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind2])
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind3])
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind4])
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind5])
        df.at[i, 'subject'] = df.subject[i].replace(myres[0], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[1], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[2], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[3], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[4], '<emoji> ')
    else:
        ind = emoji_lib[emoji_lib['utf-8'] == myres[0]].index[0]
        ind2 = emoji_lib[emoji_lib['utf-8'] == myres[1]].index[0]
        ind3 = emoji_lib[emoji_lib['utf-8'] == myres[2]].index[0]
        ind4 = emoji_lib[emoji_lib['utf-8'] == myres[3]].index[0]
        ind5 = emoji_lib[emoji_lib['utf-8'] == myres[4]].index[0]
        ind6 = emoji_lib[emoji_lib['utf-8'] == myres[5]].index[0]
        df.at[i, 'emoji_number'] = 6
        df.at[i, 'emoji_name'] = emoji_lib['description'][ind]
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind2])
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind3])
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind4])
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind5])  
        df.at[i, 'emoji_name'] = df.emoji_name[i] + ", " + (emoji_lib['description'][ind6])
        df.at[i, 'subject'] = df.subject[i].replace(myres[0], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[1], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[2], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[3], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[4], '<emoji> ')
        df.at[i, 'subject'] = df.subject[i].replace(myres[5], '<emoji> ')

In [None]:
ind = emoji_lib[emoji_lib['utf-8'] == myres[0]].index[0]
df.at[i, 'emoji_name'] = emoji_lib['description'][ind]

In [770]:
df.head(100)

Unnamed: 0,date,subject,name,email,domain,encoding,emoji_in,emoji_number,emoji_name
0,2019-11-05 12:52:10-08:00,Finish setting up your new Google Account,Google Community Team,googlecommunityteam-noreply@google.com,google.com,,False,0.0,
1,2019-11-06 15:51:29+00:00,Job Search - Invitation to collaborate,Frank Flavell (via Google Drive),drive-shares-noreply@google.com,google.com,,False,0.0,
2,2019-11-08 18:09:46-08:00,U.S. Census Bureau Prospective Candidate Confi...,noreply@csod.com,noreply@csod.com,csod.com,,False,0.0,
3,2019-11-08 18:12:30-08:00,U.S. Census Bureau Prospective Candidate Confi...,noreply@csod.com,noreply@csod.com,csod.com,,False,0.0,
4,2019-11-12 22:25:02+00:00,Notification from Akins HR Team,Akins Team,info@akinsc.com,akinsc.com,,False,0.0,
...,...,...,...,...,...,...,...,...,...
95,2020-10-15 17:30:18-04:00,Sorry! Your visit has been canceled,NoReply@CVSCaremark.com,NoReply@CVSCaremark.com,CVSCaremark.com,,False,0.0,
96,2020-10-15 17:34:25-04:00,Todays CVS MinuteClinic visit,minutecliniccare@cvs.com,minutecliniccare@cvs.com,cvs.com,,False,0.0,
97,2020-10-15 17:36:17-04:00,Sorry! Your visit has been canceled,NoReply@CVSCaremark.com,NoReply@CVSCaremark.com,CVSCaremark.com,,False,0.0,
98,2020-10-15 15:03:41-07:00,"Heres 40% Off to Save More This Month, Frank!",CVS ExtraCare,extracare@pharmacy.cvs.com,pharmacy.cvs.com,,False,0.0,


In [737]:
df.subject[0]

'Finish setting up your new Google Account'

In [754]:
myres = [ele for ele in emoji_lib['utf-8'] if(ele in 'Kano World Consent Confirmed  xe2x9cx85')]

In [755]:
len(myres)

1

In [753]:
df.subject[14]

'Kano World Consent Confirmed  xe2x9cx85'

In [764]:
myres[0]

'xe2x9cx85'

In [758]:
emoji_lib['utf-8'].index('xe2x9cx85')

TypeError: 'RangeIndex' object is not callable

In [766]:
emoji_lib[emoji_lib['utf-8'] == myres[0]].index[0]

77

In [724]:
for i in emoji_lib['utf-8']:
    for j in test_emo:
        if i==j:
            print('YES')
        else:
            print('nope')

nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope
nope


In [734]:
test_emo.replace('xf0x9fx91x8b','<emoji> ')

'<emoji> Welcome to Kano World'

In [241]:
import emoji

In [250]:
emoji.distinct_emoji_list(df.subject[13])

[]

In [243]:
emoji.emoji_list(test_emo)

[]

In [244]:
import regex

In [245]:
regex.search('\p{Emoji=Yes}', test_e.decode('utf8'))

AttributeError: 'str' object has no attribute 'decode'

In [254]:
def extract_emojis(s):
  return ''.join(c for c in s if c in emoji_lib['utf-8'])

In [287]:
print([c for c in test_emo])

['\\', 'x', 'f', '0', '\\', 'x', '9', 'f', '\\', 'x', '9', '1', '\\', 'x', '8', 'b', 'W', 'e', 'l', 'c', 'o', 'm', 'e', ' ', 't', 'o', ' ', 'K', 'a', 'n', 'o', ' ', 'W', 'o', 'r', 'l', 'd']


In [285]:
test_print(test_emo)

<generator object test_print.<locals>.<genexpr> at 0x7feee4d67c80>


In [255]:
extract_emojis(test_emo)

''

In [257]:
from django.utils.encoding import smart_str,smart_unicode

cleaned_up_text=smart_str(test_emo)

ImportError: cannot import name 'smart_unicode' from 'django.utils.encoding' (/opt/anaconda3/lib/python3.9/site-packages/django/utils/encoding.py)

In [258]:
test_emo.decode("unicode_escape")

AttributeError: 'str' object has no attribute 'decode'

In [298]:
def emojified(s):
    data = pd.DataFrame(columns=['symbol', 'utf-8', 'description'])
    for word in s:
        i
        row = emoji_lib[emoji_lib['utf-8'] == word]
        data = pd.concat([data, row])
    return data

In [305]:
from nltk.tokenize import RegexpTokenizer
tokenizer = RegexpTokenizer(r'[a-zA-Z0-9]+')

In [306]:
test_tokenized = tokenizer.tokenize(test_emo)

In [307]:
test_tokenized

['xf0', 'x9f', 'x91', 'x8bWelcome', 'to', 'Kano', 'World']

In [308]:
for word in test_tokenized:
    if word in emoji_lib['utf-8']:
        print('YES')
    else:
        print('nope')

nope
nope
nope
nope
nope
nope
nope


In [299]:
emojified(test_emo)

Unnamed: 0,symbol,utf-8,description,type,native,android,unicode


In [309]:
import demoji

ModuleNotFoundError: No module named 'demoji'

In [311]:

import demoji

In [312]:
demoji.findall(test_emo)

{}

In [317]:
type(test_emo.encode('unicode_escape'))

bytes