### Importing Libraries and Generating random dataset

In [5]:
import pandas as pd
import numpy as np

np.random.seed(42)

data = {
    "follower_id": range(1, 101),
    "follower_count": np.random.randint(50, 5000, 100),
    "following_count": np.random.randint(10, 5000, 100),
    "average_likes": np.random.randint(0, 100, 100),
    "average_comments": np.random.randint(0, 20, 100),
    "account_age_days": np.random.randint(1, 3650, 100),  # Account age in days
}

df = pd.DataFrame(data)
df["engagement_rate"] = (df["average_likes"] + df["average_comments"]) / df["follower_count"]
df.head()

Unnamed: 0,follower_id,follower_count,following_count,average_likes,average_comments,account_age_days,engagement_rate
0,1,910,814,70,13,958,0.091209
1,2,3822,2741,51,5,3060,0.014652
2,3,3142,2783,32,2,453,0.010821
3,4,516,1580,39,8,1228,0.091085
4,5,4476,2700,38,4,2960,0.009383


### Defining thresholds for fake followers

In [6]:
engagement_rate_threshold = 0.005  
follower_following_ratio_threshold = 2.0  
account_age_threshold = 30 

### Flagging potential fake followers based on the criteria

In [7]:
df["is_fake"] = (
    (df["engagement_rate"] < engagement_rate_threshold) |
    ((df["follower_count"] / df["following_count"]) > follower_following_ratio_threshold) |
    (df["account_age_days"] < account_age_threshold)
)

fake_followers = df[df["is_fake"]]
print("Potential Fake Followers:")
print(fake_followers)

Potential Fake Followers:
    follower_id  follower_count  following_count  average_likes  \
6             7            3221             1038              0   
7             8            2969              512             10   
11           12            2441              216             49   
15           16            3435              573             41   
17           18            4893             1767              6   
21           22            2608             1069              1   
28           29            3055             3070              8   
33           34            3252             1991              2   
34           35            3606             1673             19   
35           36            3940             1539             23   
43           44            2091              719             83   
48           49            2606              673             92   
52           53            3202             1505             53   
55           56            3993     

### Function to generate a report

In [8]:
def generate_report(df, fake_followers):
    total_followers = len(df)
    total_fake_followers = len(fake_followers)
    fake_percentage = (total_fake_followers / total_followers) * 100
    
    report = f"""
    Fake Follower Analysis Report
    -----------------------------
    Total Followers Analyzed: {total_followers}
    Potential Fake Followers: {total_fake_followers}
    Percentage of Fake Followers: {fake_percentage:.2f}%
    
    Criteria Used for Detection:
    1. Engagement Rate < {engagement_rate_threshold}
    2. Follower/Following Ratio > {follower_following_ratio_threshold}
    3. Account Age < {account_age_threshold} days

    List of Potential Fake Followers:
    {fake_followers.to_string(index=False)}
    """
    
    with open("fake_follower_report.txt", "w") as file:
        file.write(report)
    
    print(report)

generate_report(df, fake_followers)


    Fake Follower Analysis Report
    -----------------------------
    Total Followers Analyzed: 100
    Potential Fake Followers: 24
    Percentage of Fake Followers: 24.00%
    
    Criteria Used for Detection:
    1. Engagement Rate < 0.005
    2. Follower/Following Ratio > 2.0
    3. Account Age < 30 days

    List of Potential Fake Followers:
     follower_id  follower_count  following_count  average_likes  average_comments  account_age_days  engagement_rate  is_fake
           7            3221             1038              0                13              2522         0.004036     True
           8            2969              512             10                 2              2480         0.004042     True
          12            2441              216             49                 2              1313         0.020893     True
          16            3435              573             41                 7              2071         0.013974     True
          18            4893 