In [25]:
import re
import pandas as pd
import os
import matplotlib.pyplot as plt
from collections import Counter

# Define the emoji pattern
emoji_pattern = re.compile(
    r'[\U0001F1E6-\U0001F1FF]|'  # Flags
    r'[\U0001F300-\U0001F5FF]|'  # Symbols & Pictographs
    r'[\U0001F600-\U0001F64F]|'  # Emoticons
    r'[\U0001F680-\U0001F6FF]|'  # Transport & Map Symbols
    r'[\U0001F700-\U0001F77F]|'  # Alchemical Symbols
    r'[\U0001F780-\U0001F7FF]|'  # Geometric Shapes Extended
    r'[\U0001F800-\U0001F8FF]|'  # Supplemental Arrows-C
    r'[\U0001F900-\U0001F9FF]|'  # Supplemental Symbols and Pictographs
    r'[\U0001FA70-\U0001FAFF]|'  # Symbols and Pictographs Extended-A
    r'[\U00002700-\U000027BF]|'  # Dingbats
    r'[\U0001F000-\U0001FFFF]|'  # Miscellaneous Symbols
    r'\uFE0F|'                   # Variation Selector
    r'\u200D'                    # Zero Width Joiner
, flags=re.UNICODE)
heart_emoji = '❤'
cryingface_emoji = '😂'
fire_emoji = '🔥'
smiling_emoji = '😊'
pray_emoji='🙏'
# Folder containing CSV files
folder_path = './comments'

# Combine all CSV files into a single DataFrame
emoji_counts_list = []
heart_counts_list = []
for file in os.listdir(folder_path):
    if file.endswith('.csv'):
        video_data = pd.read_csv(os.path.join(folder_path, file), on_bad_lines='skip', encoding='utf-8', engine='python')
        if 'comment' in video_data.columns:
            video_data['emojis'] =video_data['comment'].apply(lambda x: ''.join([e for e in emoji_pattern.findall(str(x)) if e.strip() != '']))
            video_data['heart_count'] = video_data['comment'].apply(lambda x: str(x).count(heart_emoji))
            video_data['cryingface_count'] = video_data['comment'].apply(lambda x: str(x).count(cryingface_emoji))
            video_data['fire_count'] = video_data['comment'].apply(lambda x: str(x).count(fire_emoji))
            video_data['smiling_count'] = video_data['comment'].apply(lambda x: str(x).count(smiling_emoji))
            video_data['pray_count'] = video_data['comment'].apply(lambda x: str(x).count(pray_emoji))
            video_data['emoji_count'] = video_data['emojis'].apply(lambda x: len(x))
            
            # Sum the emoji counts for the entire CSV
            total_emojis = video_data['emoji_count'].sum()
            video_heart_counts = video_data['heart_count'].sum()
            video_cryingface_counts = video_data['cryingface_count'].sum()
            video_fire_counts = video_data['fire_count'].sum()
            video_smiling_counts = video_data['smiling_count'].sum()
            video_pray_counts = video_data['pray_count'].sum()
            # Store the result
            emoji_counts_list.append({'filename': file, 'emoji_count': total_emojis,'heart_count':video_heart_counts,'cryingface_count':video_cryingface_counts,'fire_count':video_fire_counts,'smiling_count':video_smiling_counts,'pray_count':video_pray_counts})
emoji_counts_df = pd.DataFrame(emoji_counts_list)
emoji_counts_df['filename'] = emoji_counts_df['filename'].str.replace('.csv', '', regex=False)
emoji_counts_df

Unnamed: 0,filename,emoji_count,heart_count,cryingface_count,fire_count,smiling_count,pray_count
0,"El Chombo, Dancing Green Alien - Dame Tu Cosit...",152198,17105,46451,890,5495,237
1,Coldplay - Hymn For The Weekend (Official Video),107254,26438,1323,1761,2316,1941
2,Marshmello - Alone (Official Music Video),104374,31020,2510,2983,5170,302
3,BLACKPINK - 'Kill This Love' MV,200647,61052,8091,2107,10606,218
4,Justin Bieber - Baby ft. Ludacris,100273,38569,5590,826,4618,325
...,...,...,...,...,...,...,...
95,Adele - Rolling in the Deep (Official Music Vi...,67447,14566,1352,1921,1025,344
96,Ed Sheeran - Thinking Out Loud (Official Music...,72891,15061,1227,1072,1379,652
97,Bruno Mars - That’s What I Like [Official Musi...,39651,4984,1593,2036,543,236
98,Eminem - Love The Way You Lie ft. Rihanna,85557,11612,1636,3706,1275,507


In [26]:
import plotly.express as px
top_10_csvs = emoji_counts_df.sort_values(by='emoji_count', ascending=False).head(10)

# Plot using Plotly
fig = px.bar(
    top_10_csvs,
    x='filename',
    y='emoji_count',
    title='Top 10 CSV Files with Most Emojis',
    labels={'filename': 'CSV File', 'emoji_count': 'Total Emojis'},
    text='emoji_count',
    template='plotly_white',
    color_discrete_sequence=['green']
)
fig.update_layout(font_family="Arial Unicode MS",xaxis=dict(tickangle=90))  # Set a font supporting emojis
fig.show()

In [27]:
import plotly.express as px
top_10_csvs = emoji_counts_df.sort_values(by='heart_count', ascending=False).head(10)

# Plot using Plotly
fig = px.bar(
    top_10_csvs,
    x='filename',
    y='heart_count',
    title='Top 10 CSV Files with Most Hearted Emojis',
    labels={'filename': 'CSV File', 'heart_count': 'Total Heart Emojis'},
    text='heart_count',
    template='plotly_white',
    color_discrete_sequence=['red']
)
fig.update_layout(font_family="Arial Unicode MS",xaxis=dict(tickangle=90))  # Set a font supporting emojis
fig.show()

In [28]:
import plotly.express as px
top_10_csvs = emoji_counts_df.sort_values(by='cryingface_count', ascending=False).head(10)

# Plot using Plotly
fig = px.bar(
    top_10_csvs,
    x='filename',
    y='cryingface_count',
    title='Top 10 CSV Files with Most 😂 Emojis',
    labels={'filename': 'CSV File', 'cryingface_count': 'Total Cryingface Emojis'},
    text='cryingface_count',
    template='plotly_white',
    color_discrete_sequence=['purple']
)
fig.update_layout(font_family="Arial Unicode MS",xaxis=dict(tickangle=90))  # Set a font supporting emojis
fig.show()

In [29]:
import plotly.express as px
top_10_csvs = emoji_counts_df.sort_values(by='fire_count', ascending=False).head(10)

# Plot using Plotly
fig = px.bar(
    top_10_csvs,
    x='filename',
    y='fire_count',
    title='Top 10 CSV Files with Most 🔥 Emojis',
    labels={'filename': 'CSV File', 'fire_count': 'Total Fire Emojis'},
    text='fire_count',
    template='plotly_white',
    color_discrete_sequence=['orange']
)
fig.update_layout(font_family="Arial Unicode MS",xaxis=dict(tickangle=90))  # Set a font supporting emojis
fig.show()

In [31]:
import plotly.express as px
top_10_csvs = emoji_counts_df.sort_values(by='smiling_count', ascending=False).head(10)

# Plot using Plotly
fig = px.bar(
    top_10_csvs,
    x='filename',
    y='smiling_count',
    title='Top 10 CSV Files with Most 😊 Emojis',
    labels={'filename': 'CSV File', 'smiling_count': 'Total Smiling Emojis'},
    text='smiling_count',
    template='plotly_white',
    color_discrete_sequence=['brown']
)
fig.update_layout(font_family="Arial Unicode MS",xaxis=dict(tickangle=90))  # Set a font supporting emojis
fig.show()

In [32]:
import plotly.express as px
top_10_csvs = emoji_counts_df.sort_values(by='pray_count', ascending=False).head(10)

# Plot using Plotly
fig = px.bar(
    top_10_csvs,
    x='filename',
    y='pray_count',
    title='Top 10 CSV Files with Most 🙏 Emojis',
    labels={'filename': 'CSV File', 'pray_count': 'Total Pray Emojis'},
    text='pray_count',
    template='plotly_white',
    color_discrete_sequence=['blue']
)
fig.update_layout(font_family="Arial Unicode MS",xaxis=dict(tickangle=90))  # Set a font supporting emojis
fig.show()