In [96]:
import pandas as pd 
from tweepy import Client
import re
import json

def authenticate():
    """Read bearer token from JSON file and authenticate with Twitter API."""
    
    with open("twitter.json", "r") as fp:
        twitter_config = json.load(fp)
    bearer_token = twitter_config["bearer_token"]
    client = Client(bearer_token=bearer_token)
    return client

In [97]:
def get_query_response(client):
    """Queries Twitter API to retrieve user posts containing Wordle updates."""

    search_word = "wordle" 
    tweets = client.search_recent_tweets(query="wordle", max_results=100)
    tweet_list = tweets.data
    return tweet_list


In [98]:
def is_wordle_block(char):
    is_wordle_box = False
    if(ord(char)==129001 or ord(char)==129000 or ord(char)==11036 or ord(char)==11035):
        is_wordle_box = True
    
    return is_wordle_box

def print_data(data_list) :
    for wordle_data in data_list:
        for key, value in wordle_data.items():
            print(key, ' : ', value)
        print("\n")

def post_processing(tweet_list):
    data_list =[]
    for list_item in tweet_list :
        tweet_data = list_item.data
        text_data = tweet_data['text']
        
        results = re.search('Wordle ([0-9]+) (\d\/\d)', text_data)
        if(not results):
            results = re.search('Wordle \(ES\)  [#]*([0-9]+) (\d\/\d)', text_data)
        if results:
            user_data = {}
            user_data['board_number'] = results.group(1)
            user_data['score'] = results.group(2)
            #print(user_data)

            wordle_board = '\n'
            prev_result = False
            for char in text_data :
                wordle_block = is_wordle_block(char)
                if wordle_block:
                    wordle_board += char
                if ord(char)==10 and prev_result :
                    wordle_board +=char
                prev_result = wordle_block
            user_data['board'] = wordle_board
            data_list.append(user_data)
    return data_list




In [99]:
#Main script to extract wordle data from tweets

client = authenticate()
tweet_list = get_query_response(client)

data_list = post_processing(tweet_list)
print_data(data_list)


board_number  :  300
score  :  2/6
board  :  
⬛⬛🟩⬛⬛
🟩🟩🟩🟩🟩



board_number  :  300
score  :  6/6
board  :  
⬛🟨🟩🟨⬛
⬛🟩🟩⬛🟩
🟩🟩🟩⬛🟩
🟩🟩🟩⬛🟩
🟩🟩🟩⬛🟩
🟩🟩🟩🟩🟩


board_number  :  77
score  :  5/6
board  :  
🟨⬜⬜🟨⬜
⬜⬜🟨🟨⬜
⬜⬜⬜⬜⬜
🟨⬜🟨⬜⬜
🟩🟩🟩🟩🟩


board_number  :  300
score  :  3/6
board  :  
🟩⬛⬛⬛🟩
🟩🟨⬛⬛🟩
🟩🟩🟩🟩🟩


board_number  :  300
score  :  4/6
board  :  
🟩⬛🟩⬛🟩
⬛⬛⬛⬛⬛
🟨⬛⬛🟨⬛
🟩🟩🟩🟩🟩


board_number  :  300
score  :  6/6
board  :  
⬜🟨⬜⬜🟩
🟩🟩🟩⬜🟩
🟩🟩🟩⬜🟩
🟩🟩🟩⬜🟩
🟩🟩🟩⬜🟩
🟩🟩🟩🟩🟩


board_number  :  299
score  :  5/6
board  :  
⬜🟨⬜⬜🟨
🟨🟨⬜⬜⬜
⬜⬜⬜🟨🟩
⬜🟩🟩🟩🟩
🟩🟩🟩🟩🟩


board_number  :  299
score  :  3/6
board  :  
⬜🟩🟩🟨⬜
⬜🟩🟩🟩🟩
🟩🟩🟩🟩🟩



board_number  :  299
score  :  4/6
board  :  
⬜⬜⬜⬜🟩
⬜⬜⬜⬜🟩
⬜🟩🟩🟩🟩
🟩🟩🟩🟩🟩


board_number  :  300
score  :  4/6
board  :  
⬛⬛🟩⬛🟩
🟩⬛🟩⬛🟩
🟩⬛🟩⬛🟩
🟩🟩🟩🟩🟩


board_number  :  299
score  :  4/6
board  :  
⬛⬛⬛⬛⬛
🟨⬛🟨⬛⬛
⬛⬛🟩🟩🟩
🟩🟩🟩🟩🟩


board_number  :  299
score  :  5/6
board  :  
⬜⬜⬜⬜⬜
⬜🟩🟨🟨⬜
🟩🟩🟩🟨⬜
🟩🟩🟩⬜🟩
🟩🟩🟩🟩🟩


board_number  :  300
score  :  5/6
board  :  
🟩🟩🟩⬛⬛
🟩🟩🟩⬛🟩
🟩🟩🟩⬛🟩
🟩🟩🟩⬛🟩
🟩🟩🟩🟩🟩



board_number  :  300
score  :  5/6
boa

In [100]:
#Get dataframe from processed queries
df = pd.DataFrame(data_list)
df


Unnamed: 0,board_number,score,board
0,300,2/6,\n⬛⬛🟩⬛⬛\n🟩🟩🟩🟩🟩\n
1,300,6/6,\n⬛🟨🟩🟨⬛\n⬛🟩🟩⬛🟩\n🟩🟩🟩⬛🟩\n🟩🟩🟩⬛🟩\n🟩🟩🟩⬛🟩\n🟩🟩🟩🟩🟩
2,77,5/6,\n🟨⬜⬜🟨⬜\n⬜⬜🟨🟨⬜\n⬜⬜⬜⬜⬜\n🟨⬜🟨⬜⬜\n🟩🟩🟩🟩🟩
3,300,3/6,\n🟩⬛⬛⬛🟩\n🟩🟨⬛⬛🟩\n🟩🟩🟩🟩🟩
4,300,4/6,\n🟩⬛🟩⬛🟩\n⬛⬛⬛⬛⬛\n🟨⬛⬛🟨⬛\n🟩🟩🟩🟩🟩
...,...,...,...
72,98,2/6,\n🟩🟨🟩⬜🟨\n🟩🟩🟩🟩🟩\n⬜🟩⬜⬜🟨⬜🟩\n🟩🟩🟩🟩🟩🟩🟩\n
73,299,4/6,\n⬜⬜🟨⬜🟩\n⬜🟩⬜🟨🟩\n⬜🟩🟩🟩🟩\n🟩🟩🟩🟩🟩\n
74,300,6/6,\n⬛⬛⬛⬛⬛\n🟩⬛🟩⬛🟩\n🟩⬛🟩⬛🟩\n🟩⬛🟩⬛🟩\n🟩🟩🟩⬛🟩\n🟩🟩🟩🟩🟩
75,300,5/6,\n🟩⬛⬛⬛⬛\n🟩⬛⬛⬛🟩\n🟩🟩⬛⬛🟩\n🟩🟩⬛⬛🟩\n🟩🟩🟩🟩🟩
