# Demo of how to identify the structure of a tweet

In [1]:
# This code demonstrates how to perform structural content analysis on data, e.g. tweets
# this enables comparison of structure of content such as order of content types rather than specific text

In [2]:
import identifyStructure

In [3]:
# TEST to show the results and how to get the document structure and content structure

print('Original input:')
test = 'RT @here @there @everywhere #hashtag #emojitest is all 4️⃣ ❤️ more 🇦🇺 👨🏾‍👩🏾‍👧🏾‍👦🏾txt and more!!! https://www.url.com 🧵👨🏾‍👩🏾‍👧🏾‍👦🏾👩🏾‍💻👪🏿 🗳️🗳 😃 🟠https://www.url.com'
print(test)
print()


print('Step 1: Processed structure tokens')
# This step analyzes the contents of the input text and and labels each segment based on content type
list_of_token_tuples = identifyStructure.tokenizeStructure(test)
print(list_of_token_tuples)
print()


print('Step 2: Full document structure with count tokens and contents')
# This step creates spans by combining sequential content of same type together into a structure span
# The output is for each span in order, the type of content, number of tokens, and a list of the content in that span
full_document_structure_w_content = identifyStructure.getFullDocumentStructureWithContent(list_of_token_tuples)
print(full_document_structure_w_content)
print()


print('Document structure (list of content type and count of tokens)')
# This lists the order of content and the number of tokens that make up that span
# e.g. three emojis in a row would be [('emoji',3)]
document_structure = identifyStructure.getDocumentStructure(full_document_structure_w_content)
print(document_structure)
print()


print('Content structure is a list of the content types in order')
# just the order of content types
content_structure = identifyStructure.getContentStructure(full_document_structure_w_content)
print(content_structure)



Original input:
RT @here @there @everywhere #hashtag #emojitest is all 4️⃣ ❤️ more 🇦🇺 👨🏾‍👩🏾‍👧🏾‍👦🏾txt and more!!! https://www.url.com 🧵👨🏾‍👩🏾‍👧🏾‍👦🏾👩🏾‍💻👪🏿 🗳️🗳 😃 🟠https://www.url.com

Step 1: Processed structure tokens
[('RT', 'RT'), (' ', 'space'), ('@here', 'at_mention'), (' ', 'space'), ('@there', 'at_mention'), (' ', 'space'), ('@everywhere', 'at_mention'), (' ', 'space'), ('#hashtag', 'hashtag'), (' ', 'space'), ('#emojitest', 'hashtag'), (' ', 'space'), ('is', 'word'), (' ', 'space'), ('all', 'word'), (' ', 'space'), ('4️⃣', 'emoji'), (' ', 'space'), ('❤️', 'emoji'), (' ', 'space'), ('more', 'word'), (' ', 'space'), ('🇦🇺', 'emoji'), (' ', 'space'), ('👨🏾\u200d👩🏾\u200d👧🏾\u200d👦🏾', 'emoji'), ('txt', 'word'), (' ', 'space'), ('and', 'word'), (' ', 'space'), ('more', 'word'), ('!', 'punctuation'), ('!', 'punctuation'), ('!', 'punctuation'), (' ', 'space'), ('https://www.url.com', 'url'), (' ', 'space'), ('🧵', 'emoji'), ('👨🏾\u200d👩🏾\u200d👧🏾\u200d👦🏾', 'emoji'), ('👩🏾\u200d💻', 'emoji'), ('👪🏿'

In [4]:
# Apply document structure to your data

import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)



In [5]:
# load in sample data
df = pd.read_csv('./sample_data/sample_csv_data_w_emojis_utf8.csv')
df.head(5)

Unnamed: 0,rownum,text,userid
0,1,Show your hands panther fans 🤚,1
1,2,@user92 @user14 so many people,2
2,3,RT @user19 @user42 @user11 no foolin! ⬇️ He’s not playing.,3
3,4,4,0
4,5,RT @here @there @everywhere #hashtag #emojitest is all 4️⃣ ❤️ more 🇦🇺 👨🏾‍👩🏾‍👧🏾‍👦🏾txt and more!!! https://www.url.com 🧵👨🏾‍👩🏾‍👧🏾‍👦🏾👩🏾‍💻👪🏿 🗳️🗳 😃 🟠https://www.url.com,0


In [6]:
# IF YOU HAVE A LOT OF DATA THIS STEP CAN TAKE A WHILE (e.g. an hour for over a 30 million rows)
df['full_document_structure'] =   df['text'].apply(lambda x: identifyStructure.getFullDocumentStructureWithContent(identifyStructure.tokenizeStructure(x)))
df.head(7)

Unnamed: 0,rownum,text,userid,full_document_structure
0,1,Show your hands panther fans 🤚,1,"[(text, 10, [Show, , your, , hands, , panther, , fans, ]), (emoji, 1, [🤚])]"
1,2,@user92 @user14 so many people,2,"[(at_mention, 2, [@user92, @user14]), (text, 5, [so, , many, , people])]"
2,3,RT @user19 @user42 @user11 no foolin! ⬇️ He’s not playing.,3,"[(RT, 1, [RT]), (at_mention, 3, [@user19, @user42, @user11]), (text, 3, [no, , foolin]), (punctuation, 1, [!]), (emoji, 1, [⬇️]), (text, 5, [He’s, , not, , playing]), (punctuation, 1, [. ])]"
3,4,4,0,"[(text, 1, [4])]"
4,5,RT @here @there @everywhere #hashtag #emojitest is all 4️⃣ ❤️ more 🇦🇺 👨🏾‍👩🏾‍👧🏾‍👦🏾txt and more!!! https://www.url.com 🧵👨🏾‍👩🏾‍👧🏾‍👦🏾👩🏾‍💻👪🏿 🗳️🗳 😃 🟠https://www.url.com,0,"[(RT, 1, [RT]), (at_mention, 3, [@here, @there, @everywhere]), (hashtag, 2, [#hashtag, #emojitest]), (text, 4, [is, , all, ]), (emoji, 2, [4️⃣, ❤️]), (text, 2, [more, ]), (emoji, 2, [🇦🇺, 👨🏾‍👩🏾‍👧🏾‍👦🏾]), (text, 5, [txt, , and, , more]), (punctuation, 3, [!, !, !]), (url, 1, [https://www.url.com]), (emoji, 8, [🧵, 👨🏾‍👩🏾‍👧🏾‍👦🏾, 👩🏾‍💻, 👪🏿, 🗳️, 🗳️, 😃, 🟠]), (url, 1, [https://www.url.com])]"
5,6,,0,[]
6,7,Time for happy hour! Going to Rossi's @user103 https://www.url.com,4,"[(text, 7, [Time, , for, , happy, , hour]), (punctuation, 1, [!]), (text, 6, [Going, , to, , Rossi's, ]), (at_mention, 1, [@user103]), (url, 1, [https://www.url.com])]"


In [7]:
df['document_structure'] = df['full_document_structure'].apply(identifyStructure.getDocumentStructure)
df['content_structure'] = df['full_document_structure'].apply(identifyStructure.getContentStructure)

df.head(7)

Unnamed: 0,rownum,text,userid,full_document_structure,document_structure,content_structure
0,1,Show your hands panther fans 🤚,1,"[(text, 10, [Show, , your, , hands, , panther, , fans, ]), (emoji, 1, [🤚])]","[(text, 10), (emoji, 1)]","[text, emoji]"
1,2,@user92 @user14 so many people,2,"[(at_mention, 2, [@user92, @user14]), (text, 5, [so, , many, , people])]","[(at_mention, 2), (text, 5)]","[at_mention, text]"
2,3,RT @user19 @user42 @user11 no foolin! ⬇️ He’s not playing.,3,"[(RT, 1, [RT]), (at_mention, 3, [@user19, @user42, @user11]), (text, 3, [no, , foolin]), (punctuation, 1, [!]), (emoji, 1, [⬇️]), (text, 5, [He’s, , not, , playing]), (punctuation, 1, [. ])]","[(RT, 1), (at_mention, 3), (text, 3), (punctuation, 1), (emoji, 1), (text, 5), (punctuation, 1)]","[RT, at_mention, text, punctuation, emoji, text, punctuation]"
3,4,4,0,"[(text, 1, [4])]","[(text, 1)]",[text]
4,5,RT @here @there @everywhere #hashtag #emojitest is all 4️⃣ ❤️ more 🇦🇺 👨🏾‍👩🏾‍👧🏾‍👦🏾txt and more!!! https://www.url.com 🧵👨🏾‍👩🏾‍👧🏾‍👦🏾👩🏾‍💻👪🏿 🗳️🗳 😃 🟠https://www.url.com,0,"[(RT, 1, [RT]), (at_mention, 3, [@here, @there, @everywhere]), (hashtag, 2, [#hashtag, #emojitest]), (text, 4, [is, , all, ]), (emoji, 2, [4️⃣, ❤️]), (text, 2, [more, ]), (emoji, 2, [🇦🇺, 👨🏾‍👩🏾‍👧🏾‍👦🏾]), (text, 5, [txt, , and, , more]), (punctuation, 3, [!, !, !]), (url, 1, [https://www.url.com]), (emoji, 8, [🧵, 👨🏾‍👩🏾‍👧🏾‍👦🏾, 👩🏾‍💻, 👪🏿, 🗳️, 🗳️, 😃, 🟠]), (url, 1, [https://www.url.com])]","[(RT, 1), (at_mention, 3), (hashtag, 2), (text, 4), (emoji, 2), (text, 2), (emoji, 2), (text, 5), (punctuation, 3), (url, 1), (emoji, 8), (url, 1)]","[RT, at_mention, hashtag, text, emoji, text, emoji, text, punctuation, url, emoji, url]"
5,6,,0,[],[],[]
6,7,Time for happy hour! Going to Rossi's @user103 https://www.url.com,4,"[(text, 7, [Time, , for, , happy, , hour]), (punctuation, 1, [!]), (text, 6, [Going, , to, , Rossi's, ]), (at_mention, 1, [@user103]), (url, 1, [https://www.url.com])]","[(text, 7), (punctuation, 1), (text, 6), (at_mention, 1), (url, 1)]","[text, punctuation, text, at_mention, url]"


In [8]:
# see top 7 most common document structures across the sample
# document structure is the list of content types and count of tokens of the same type

# to view data elements that are of a list data type use the pandas .astype(str)
df['document_structure'].astype(str).value_counts()[:7]

[('text', 6), ('emoji', 1)]                                                       3
[('at_mention', 5), ('text', 1), ('punctuation', 1), ('emoji', 3)]                3
[('text', 4), ('emoji', 1)]                                                       2
[('text', 1), ('punctuation', 1), ('emoji', 1)]                                   2
[('text', 3), ('punctuation', 1), ('emoji', 6)]                                   2
[('text', 8), ('emoji', 1)]                                                       2
[('text', 5), ('punctuation', 1), ('emoji', 1), ('at_mention', 1), ('url', 1)]    1
Name: document_structure, dtype: int64

In [9]:
# see the top 10 most common content structures across the sample
df['content_structure'].astype(str).value_counts()[:10]

['text', 'emoji']                                 15
['text', 'punctuation', 'emoji']                   8
['text']                                           7
['RT', 'at_mention', 'text', 'emoji', 'text']      4
['text', 'emoji', 'url']                           3
['at_mention', 'text', 'punctuation', 'emoji']     3
['emoji', 'text']                                  3
['text', 'punctuation', 'text']                    3
['text', 'emoji', 'text', 'emoji']                 2
['text', 'punctuation', 'emoji', 'at_mention']     2
Name: content_structure, dtype: int64

In [10]:
# save the top 10 content structures to a dataframe
top10_content_structures_df = df['content_structure'].astype(str).value_counts()[:10].reset_index()
top10_content_structures_df.columns=['content_structure','count_rows']
top10_content_structures_df

Unnamed: 0,content_structure,count_rows
0,"['text', 'emoji']",15
1,"['text', 'punctuation', 'emoji']",8
2,['text'],7
3,"['RT', 'at_mention', 'text', 'emoji', 'text']",4
4,"['text', 'emoji', 'url']",3
5,"['at_mention', 'text', 'punctuation', 'emoji']",3
6,"['emoji', 'text']",3
7,"['text', 'punctuation', 'text']",3
8,"['text', 'emoji', 'text', 'emoji']",2
9,"['text', 'punctuation', 'emoji', 'at_mention']",2


In [11]:
# get the count of users per each of the top 10 content structures
content_structure_count_of_users = []
list_of_top_10_content_structures = top10_content_structures_df['content_structure'].tolist()

for content_struct in list_of_top_10_content_structures:
    content_structure_count_of_users.append(df[df['content_structure'].astype(str)==content_struct]['userid'].nunique())
top10_content_structures_df['author_count'] = content_structure_count_of_users
top10_content_structures_df

Unnamed: 0,content_structure,count_rows,author_count
0,"['text', 'emoji']",15,14
1,"['text', 'punctuation', 'emoji']",8,3
2,['text'],7,7
3,"['RT', 'at_mention', 'text', 'emoji', 'text']",4,1
4,"['text', 'emoji', 'url']",3,3
5,"['at_mention', 'text', 'punctuation', 'emoji']",3,3
6,"['emoji', 'text']",3,3
7,"['text', 'punctuation', 'text']",3,3
8,"['text', 'emoji', 'text', 'emoji']",2,2
9,"['text', 'punctuation', 'emoji', 'at_mention']",2,2


In [12]:
# get content spans of a specific type
# e.g. get at_mention spans
df['at_mention_spans'] = df['full_document_structure'].apply(lambda x: [tuple(tup[2]) for tup in x if tup[0]=='at_mention'])
df['emoji_spans'] = df['full_document_structure'].apply(lambda x: [tuple(tup[2]) for tup in x if tup[0]=='emoji'])
df['url_spans'] = df['full_document_structure'].apply(lambda x: [tuple(tup[2]) for tup in x if tup[0]=='url'])
df['emoji_spans_as_lists'] = df['full_document_structure'].apply(lambda x: [tup[2] for tup in x if tup[0]=='emoji'])

df.head(10)



Unnamed: 0,rownum,text,userid,full_document_structure,document_structure,content_structure,at_mention_spans,emoji_spans,url_spans,emoji_spans_as_lists
0,1,Show your hands panther fans 🤚,1,"[(text, 10, [Show, , your, , hands, , panther, , fans, ]), (emoji, 1, [🤚])]","[(text, 10), (emoji, 1)]","[text, emoji]",[],"[(🤚,)]",[],[[🤚]]
1,2,@user92 @user14 so many people,2,"[(at_mention, 2, [@user92, @user14]), (text, 5, [so, , many, , people])]","[(at_mention, 2), (text, 5)]","[at_mention, text]","[(@user92, @user14)]",[],[],[]
2,3,RT @user19 @user42 @user11 no foolin! ⬇️ He’s not playing.,3,"[(RT, 1, [RT]), (at_mention, 3, [@user19, @user42, @user11]), (text, 3, [no, , foolin]), (punctuation, 1, [!]), (emoji, 1, [⬇️]), (text, 5, [He’s, , not, , playing]), (punctuation, 1, [. ])]","[(RT, 1), (at_mention, 3), (text, 3), (punctuation, 1), (emoji, 1), (text, 5), (punctuation, 1)]","[RT, at_mention, text, punctuation, emoji, text, punctuation]","[(@user19, @user42, @user11)]","[(⬇️,)]",[],[[⬇️]]
3,4,4,0,"[(text, 1, [4])]","[(text, 1)]",[text],[],[],[],[]
4,5,RT @here @there @everywhere #hashtag #emojitest is all 4️⃣ ❤️ more 🇦🇺 👨🏾‍👩🏾‍👧🏾‍👦🏾txt and more!!! https://www.url.com 🧵👨🏾‍👩🏾‍👧🏾‍👦🏾👩🏾‍💻👪🏿 🗳️🗳 😃 🟠https://www.url.com,0,"[(RT, 1, [RT]), (at_mention, 3, [@here, @there, @everywhere]), (hashtag, 2, [#hashtag, #emojitest]), (text, 4, [is, , all, ]), (emoji, 2, [4️⃣, ❤️]), (text, 2, [more, ]), (emoji, 2, [🇦🇺, 👨🏾‍👩🏾‍👧🏾‍👦🏾]), (text, 5, [txt, , and, , more]), (punctuation, 3, [!, !, !]), (url, 1, [https://www.url.com]), (emoji, 8, [🧵, 👨🏾‍👩🏾‍👧🏾‍👦🏾, 👩🏾‍💻, 👪🏿, 🗳️, 🗳️, 😃, 🟠]), (url, 1, [https://www.url.com])]","[(RT, 1), (at_mention, 3), (hashtag, 2), (text, 4), (emoji, 2), (text, 2), (emoji, 2), (text, 5), (punctuation, 3), (url, 1), (emoji, 8), (url, 1)]","[RT, at_mention, hashtag, text, emoji, text, emoji, text, punctuation, url, emoji, url]","[(@here, @there, @everywhere)]","[(4️⃣, ❤️), (🇦🇺, 👨🏾‍👩🏾‍👧🏾‍👦🏾), (🧵, 👨🏾‍👩🏾‍👧🏾‍👦🏾, 👩🏾‍💻, 👪🏿, 🗳️, 🗳️, 😃, 🟠)]","[(https://www.url.com,), (https://www.url.com,)]","[[4️⃣, ❤️], [🇦🇺, 👨🏾‍👩🏾‍👧🏾‍👦🏾], [🧵, 👨🏾‍👩🏾‍👧🏾‍👦🏾, 👩🏾‍💻, 👪🏿, 🗳️, 🗳️, 😃, 🟠]]"
5,6,,0,[],[],[],[],[],[],[]
6,7,Time for happy hour! Going to Rossi's @user103 https://www.url.com,4,"[(text, 7, [Time, , for, , happy, , hour]), (punctuation, 1, [!]), (text, 6, [Going, , to, , Rossi's, ]), (at_mention, 1, [@user103]), (url, 1, [https://www.url.com])]","[(text, 7), (punctuation, 1), (text, 6), (at_mention, 1), (url, 1)]","[text, punctuation, text, at_mention, url]","[(@user103,)]",[],"[(https://www.url.com,)]",[]
7,8,snowtastrophe out there be carefulz 🌨❄️,5,"[(text, 10, [snowtastrophe, , out, , there, , be, , carefulz, ]), (emoji, 2, [🌨️, ❄️])]","[(text, 10), (emoji, 2)]","[text, emoji]",[],"[(🌨️, ❄️)]",[],"[[🌨️, ❄️]]"
8,9,RT @user9 No Duh🤷🏻‍♂️ Who cares,89,"[(RT, 1, [RT]), (at_mention, 1, [@user9]), (text, 3, [No, , Duh]), (emoji, 1, [🤷🏻‍♂️]), (text, 3, [Who, , cares])]","[(RT, 1), (at_mention, 1), (text, 3), (emoji, 1), (text, 3)]","[RT, at_mention, text, emoji, text]","[(@user9,)]","[(🤷🏻‍♂️,)]",[],[[🤷🏻‍♂️]]
9,10,This is my fav place 🍜🍲😋 ❤️,24,"[(text, 10, [This, , is, , my, , fav, , place, ]), (emoji, 4, [🍜, 🍲, 😋, ❤️])]","[(text, 10), (emoji, 4)]","[text, emoji]",[],"[(🍜, 🍲, 😋, ❤️)]",[],"[[🍜, 🍲, 😋, ❤️]]"


In [13]:
# top 5 most used at_mention spans in common with count of rows
print(df['at_mention_spans'].astype(str).value_counts()[:5])

# top 5 most used at_mention spans in common with count of rows
print(df['emoji_spans'].astype(str).value_counts()[:5])

# top 5 most used at_mention spans in common with count of rows
print(df['url_spans'].astype(str).value_counts()[:5])


[]                 64
[('@user85',)]      7
[('@user9',)]       5
[('@user103',)]     3
[('@user2',)]       2
Name: at_mention_spans, dtype: int64
[]                   24
[('👉',)]              3
[('🤣',)]              3
[('👇', '👇', '👇')]     3
[('😎',)]              2
Name: emoji_spans, dtype: int64
[]                                                                         97
[('http://www.url.com',)]                                                  12
[('https://www.url.com',)]                                                  7
[('http://www.url.com', 'http://www.url.com'), ('http://www.url.com',)]     1
[('https://www.url.com',), ('https://www.url.com',)]                        1
Name: url_spans, dtype: int64


In [14]:
# using extractEmojis get list of emojis and unique emojis
df['emoji_list'] = df['text'].apply(identifyStructure.extractEmojis.getEmojisFromText)
df['emoji_unique_list'] = df['emoji_list'].apply(identifyStructure.extractEmojis.getUniqueEmojisFromEmojiList)
df.head(10)

Unnamed: 0,rownum,text,userid,full_document_structure,document_structure,content_structure,at_mention_spans,emoji_spans,url_spans,emoji_spans_as_lists,emoji_list,emoji_unique_list
0,1,Show your hands panther fans 🤚,1,"[(text, 10, [Show, , your, , hands, , panther, , fans, ]), (emoji, 1, [🤚])]","[(text, 10), (emoji, 1)]","[text, emoji]",[],"[(🤚,)]",[],[[🤚]],[🤚],[🤚]
1,2,@user92 @user14 so many people,2,"[(at_mention, 2, [@user92, @user14]), (text, 5, [so, , many, , people])]","[(at_mention, 2), (text, 5)]","[at_mention, text]","[(@user92, @user14)]",[],[],[],[],[]
2,3,RT @user19 @user42 @user11 no foolin! ⬇️ He’s not playing.,3,"[(RT, 1, [RT]), (at_mention, 3, [@user19, @user42, @user11]), (text, 3, [no, , foolin]), (punctuation, 1, [!]), (emoji, 1, [⬇️]), (text, 5, [He’s, , not, , playing]), (punctuation, 1, [. ])]","[(RT, 1), (at_mention, 3), (text, 3), (punctuation, 1), (emoji, 1), (text, 5), (punctuation, 1)]","[RT, at_mention, text, punctuation, emoji, text, punctuation]","[(@user19, @user42, @user11)]","[(⬇️,)]",[],[[⬇️]],[⬇️],[⬇️]
3,4,4,0,"[(text, 1, [4])]","[(text, 1)]",[text],[],[],[],[],[],[]
4,5,RT @here @there @everywhere #hashtag #emojitest is all 4️⃣ ❤️ more 🇦🇺 👨🏾‍👩🏾‍👧🏾‍👦🏾txt and more!!! https://www.url.com 🧵👨🏾‍👩🏾‍👧🏾‍👦🏾👩🏾‍💻👪🏿 🗳️🗳 😃 🟠https://www.url.com,0,"[(RT, 1, [RT]), (at_mention, 3, [@here, @there, @everywhere]), (hashtag, 2, [#hashtag, #emojitest]), (text, 4, [is, , all, ]), (emoji, 2, [4️⃣, ❤️]), (text, 2, [more, ]), (emoji, 2, [🇦🇺, 👨🏾‍👩🏾‍👧🏾‍👦🏾]), (text, 5, [txt, , and, , more]), (punctuation, 3, [!, !, !]), (url, 1, [https://www.url.com]), (emoji, 8, [🧵, 👨🏾‍👩🏾‍👧🏾‍👦🏾, 👩🏾‍💻, 👪🏿, 🗳️, 🗳️, 😃, 🟠]), (url, 1, [https://www.url.com])]","[(RT, 1), (at_mention, 3), (hashtag, 2), (text, 4), (emoji, 2), (text, 2), (emoji, 2), (text, 5), (punctuation, 3), (url, 1), (emoji, 8), (url, 1)]","[RT, at_mention, hashtag, text, emoji, text, emoji, text, punctuation, url, emoji, url]","[(@here, @there, @everywhere)]","[(4️⃣, ❤️), (🇦🇺, 👨🏾‍👩🏾‍👧🏾‍👦🏾), (🧵, 👨🏾‍👩🏾‍👧🏾‍👦🏾, 👩🏾‍💻, 👪🏿, 🗳️, 🗳️, 😃, 🟠)]","[(https://www.url.com,), (https://www.url.com,)]","[[4️⃣, ❤️], [🇦🇺, 👨🏾‍👩🏾‍👧🏾‍👦🏾], [🧵, 👨🏾‍👩🏾‍👧🏾‍👦🏾, 👩🏾‍💻, 👪🏿, 🗳️, 🗳️, 😃, 🟠]]","[4️⃣, ❤️, 🇦🇺, 👨🏾‍👩🏾‍👧🏾‍👦🏾, 🧵, 👨🏾‍👩🏾‍👧🏾‍👦🏾, 👩🏾‍💻, 👪🏿, 🗳️, 🗳️, 😃, 🟠]","[4️⃣, ❤️, 🇦🇺, 👨🏾‍👩🏾‍👧🏾‍👦🏾, 👩🏾‍💻, 👪🏿, 🗳️, 😃, 🟠, 🧵]"
5,6,,0,[],[],[],[],[],[],[],[],[]
6,7,Time for happy hour! Going to Rossi's @user103 https://www.url.com,4,"[(text, 7, [Time, , for, , happy, , hour]), (punctuation, 1, [!]), (text, 6, [Going, , to, , Rossi's, ]), (at_mention, 1, [@user103]), (url, 1, [https://www.url.com])]","[(text, 7), (punctuation, 1), (text, 6), (at_mention, 1), (url, 1)]","[text, punctuation, text, at_mention, url]","[(@user103,)]",[],"[(https://www.url.com,)]",[],[],[]
7,8,snowtastrophe out there be carefulz 🌨❄️,5,"[(text, 10, [snowtastrophe, , out, , there, , be, , carefulz, ]), (emoji, 2, [🌨️, ❄️])]","[(text, 10), (emoji, 2)]","[text, emoji]",[],"[(🌨️, ❄️)]",[],"[[🌨️, ❄️]]","[🌨️, ❄️]","[❄️, 🌨️]"
8,9,RT @user9 No Duh🤷🏻‍♂️ Who cares,89,"[(RT, 1, [RT]), (at_mention, 1, [@user9]), (text, 3, [No, , Duh]), (emoji, 1, [🤷🏻‍♂️]), (text, 3, [Who, , cares])]","[(RT, 1), (at_mention, 1), (text, 3), (emoji, 1), (text, 3)]","[RT, at_mention, text, emoji, text]","[(@user9,)]","[(🤷🏻‍♂️,)]",[],[[🤷🏻‍♂️]],[🤷🏻‍♂️],[🤷🏻‍♂️]
9,10,This is my fav place 🍜🍲😋 ❤️,24,"[(text, 10, [This, , is, , my, , fav, , place, ]), (emoji, 4, [🍜, 🍲, 😋, ❤️])]","[(text, 10), (emoji, 4)]","[text, emoji]",[],"[(🍜, 🍲, 😋, ❤️)]",[],"[[🍜, 🍲, 😋, ❤️]]","[🍜, 🍲, 😋, ❤️]","[❤️, 🍜, 🍲, 😋]"


In [15]:
df.to_csv('processed_output_of_structural_content_of_sample_data.csv', index=False, encoding='utf8')
df.to_excel('processed_output_of_structural_content_of_sample_data.xlsx', index=False, encoding='utf8')
