In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import snscrape.modules.twitter as sntwitter
from emoji import UNICODE_EMOJI # Read documentation
import requests
import bs4

**Defining tickers**

In [3]:
ticker = "AAPL"

**Twitter scraping example**

In [4]:
max_tweets = 100
all_tweets = []
all_emojis = []

for i, tweet in enumerate(sntwitter.TwitterSearchScraper(ticker).get_items()):
    if i > max_tweets:
        break
    all_tweets.append(tweet.content)

for i in all_tweets:
    for element in i:
        if element in UNICODE_EMOJI['en']:
            all_emojis.append(element)

all_emojis[0:4]

['📉', '⚠', '📌', '🔍']

**Firstly, scraping for emoji category data from the web:**

In [5]:
url = "https://character.construction/emoji-categories"
response = requests.get(url)
html_parsed = bs4.BeautifulSoup(response.text, 'html.parser')


**Classifying into headers (categories) and tables (individual emojis in these categories)**

In [6]:
tables = html_parsed.find_all("table")
headers = html_parsed.find_all("h3")

**Cleaning header strings:**

In [7]:
clean_headers = []

for i in headers:
    cleaned = str(i).replace("<h3 id=","").replace("</h3>","")
    cleaned = cleaned[(cleaned.find(">")+1):]
    clean_headers.append(cleaned)

**Cleaning tables:**

In [8]:
# Function from StackExchange to find nth occurence of a substring:

def find_nth(haystack, needle, n):
    start = haystack.find(needle)
    while start >= 0 and n > 1:
        start = haystack.find(needle, start+len(needle))
        n -= 1
    return start

In [9]:
clean_tables = []

for i in tables:
    all = str(i.find_all("td"))
    number_unicodes = all.count("U+")
    unicode_list = []
    for i in range(0,(number_unicodes+1)):
        x = find_nth(all, "U+", i)
        unicode_char = all[x:(x+7)]
        # Converting into a symbol:
        try:
            unicode_symbol = chr(int(unicode_char[2:], 16))
        except:
            continue
        
        unicode_list.append(unicode_symbol)
    clean_tables.append(unicode_list)

**Creating a dictionary:**

In [None]:
classification = {}
for i in range(0,98):
    classification[str(clean_headers[i])] = clean_tables[i]

classification

**Assigning scores to different emoji classes:**

In [13]:
sentiment = {}
for key in classification:
    if "Smil" in key:
        sentiment[key] = 0.5
    if "Affection" in key:
        sentiment[key] = 1.0
    if "Neg" in key:
        sentiment[key] = -1.0
    if "skeptic" in key:
        sentiment[key] = -0.5

**Looking up emojis in actual tweets in classification dictionary:**

In [None]:
classification

In [23]:
total_score = 0

for emoji in all_emojis:
    for category, emoji in classification.items():
        if category in sentiment.keys():
            total_score = total_score + sentiment[category]
        

In [25]:
for emoji in all_emojis:
    for category, emoji in classification.items():
        print(category)

Face, Smiling
Face, Affection
Face, Tongue
Face, Hand
Face, Neutral-skeptical
Face, Sleepy
Face, Unwell
Face, Hat
Face, Glasses
Face, Concerned
Face, Negative
Face, Costume
Cat, Face
Monkey, Face
Emotion
Hand, Fingers-open
Hand, Fingers-partial
Hand, Single-finger
Hand, Fingers-closed
Hands
Hand, Prop
Body, Parts
Person
Person, Gesture
Person, Role
Person, Fantasy
Person, Activity
Person, Sport
Person, Resting
Family
Person, Symbol
Skin, Tone
Hair, Style
Animal, Mammal
Animal, Bird
Animal, Amphibian
Animal, Reptile
Animal, Marine
Animal, Bug
Plant, Flower
Plant, Other
Food, Fruit
Food, Vegetable
Food, Prepared
Food, Asian
Food, Marine
Food, Sweet
Drink
Dishware
Place, Map
Place, Geographic
Place, Building
Place, Religious
Place, Other
Transport, Ground
Transport, Water
Transport, Air
Hotel
Time
Sky &amp; weather
Event
Award, Medal
Sport
Game
Arts &amp; crafts
Clothing
Sound
Music
Musical, Instrument
Phone
Computer
Light &amp; video
Book, Paper
Money
Mail
Writing
Office
Lock
Tool
Scienc

In [None]:
classification.values()