In [2]:
import pandas as pd

# Loading the influencer dataset
df = pd.read_csv("influencers.csv")

# Calculating Engagement Rate (%)
df['engagement_rate'] = ((df['likes'] + df['comments']) / df['followers']) * 100

# Detecting potential fake followers
# Rule: Engagement rate < 2% but followers > 100k
df['fake_follower_flag'] = (df['engagement_rate'] < 2) & (df['followers'] > 100000)

# Calculating ROI score (engagement rate divided by cost per post)
df['roi_score'] = df['engagement_rate'] / df['cost_per_post']

# Suggesting top influencers by ROI
top_influencers = df.sort_values(by='roi_score', ascending=False)

# Saveing results to a new CSV
top_influencers.to_csv("influencer_analysis_results.csv", index=False)

# Printing clean results to console
print("\n=== Influencer Analysis ===")
print(df[['name', 'followers', 'engagement_rate', 'fake_follower_flag', 'roi_score']])

print("\n=== Top Influencers by ROI ===")
print(top_influencers[['name', 'roi_score']])



=== Influencer Analysis ===
       name  followers  engagement_rate  fake_follower_flag  roi_score
0     Alice     150000         2.600000               False   0.005200
1       Bob      50000         9.200000               False   0.030667
2   Charlie     200000         0.800000                True   0.001000
3     Daisy      80000        10.250000               False   0.025625
4     Ethan     120000         1.833333                True   0.004074
5     Fiona      60000         9.500000               False   0.027143
6    George     180000         1.555556                True   0.002593
7    Hannah      90000         7.555556               False   0.019883
8       Ian      75000         4.666667               False   0.014583
9   Jasmine     110000         8.090909               False   0.014711
10    Kevin     140000         2.714286               False   0.005655
11    Laura      50000        10.200000               False   0.032903
12     Mike      95000         4.736842         