In [None]:
import pandas as pd
# List of CSV file paths
file_paths = ['blinkit_processed_tweets.csv', 'bigbasket_processed_tweets.csv', 'zepto_processed_tweets.csv', 'swiggyinstamart_processed_tweets.csv']
name_mapping = {
    "blinkit_processed_tweets.csv": "blinkit",
    "bigbasket_processed_tweets.csv": "bigbasket",
    "zepto_processed_tweets.csv": "zepto",
    "swiggyinstamart_processed_tweets.csv": "swiggy instamart"
}
# Initialize lists to store the aggregated results
total_engagements = {}
total_like_to_view_ratios = {}
total_retweet_to_view_ratios = {}
dfs={}
for file_path in file_paths:
    # Read the CSV file
    data = pd.read_csv(file_path)
    
    # Calculate Total Engagement for each tweet
    data['engagement'] = data['likeCount'] + data['retweetCount'] + data['viewCount']
    
    # Calculate Engagement Rates for each tweet, handling division by zero
    data['like_to_view_ratio'] = data.apply(
        lambda row: row['likeCount'] / row['viewCount'] if row['viewCount'] != 0 else 0, axis=1
    )
    data['retweet_to_view_ratio'] = data.apply(
        lambda row: row['retweetCount'] / row['viewCount'] if row['viewCount'] != 0 else 0, axis=1
    )
    
    # Extract file name without extension
    file_name = file_path.split("/")[-1].split(".")[0]
    
    # Get desired dataframe name from mapping
    df_name = name_mapping.get(file_path, None)
    
    # If there's a name mapping, store dataframe with that name
    if df_name:
        dfs[df_name] = data
        total_engagements[df_name] = data['engagement'].sum()
        total_like_to_view_ratios[df_name] = data['like_to_view_ratio'].mean()
        total_retweet_to_view_ratios[df_name] = data['retweet_to_view_ratio'].mean()
# Plotting
plt.figure(figsize=(7,15))

# Total Engagement Plot
plt.subplot(3, 1, 1)
plt.bar(total_engagements.keys(), total_engagements.values(), color='skyblue')
plt.title('Total Engagement')
plt.xlabel('Companies')
plt.ylabel('Engagement')

# Like-to-View Ratio Plot
plt.subplot(3, 1, 2)
plt.bar(total_like_to_view_ratios.keys(), total_like_to_view_ratios.values(), color='lightgreen')
plt.title('Like-to-View Ratio')
plt.xlabel('Companies')
plt.ylabel('Score')

# Retweet-to-View Ratio Plot
plt.subplot(3, 1, 3)
plt.bar(total_retweet_to_view_ratios.keys(), total_retweet_to_view_ratios.values(), color='coral')
plt.title('Retweet-to-View Ratio')
plt.xlabel('Companies')
plt.ylabel('Score')

plt.tight_layout()
plt.show()

In [None]:
for company, df in dfs.items():
    print(f"Top 20 tweets with most engagement for {company}:")
    top_tweets = df.nlargest(20, 'engagement')
    for index, tweet in top_tweets.iterrows():
        print(f"Tweet {index + 1}:")
        print(f"   Engagement: {tweet['engagement']}")
        print(f"   Twitter URL: {tweet['twitterUrl']}")
    print("\n")