3.5 seed node selection

Generate rules based on Tweet engagement (likes + retweets).
Apply convolution operation to perform lightweight filtering.
Evaluate activation function for classification.
Perform batch normalization and concatenation for model input.
Iterate until convergence based on loss criteria.
This script processes your dataset using convolution, activation functions, and batch normalization to classify influenced nodes.

In [None]:
import pandas as pd
import numpy as np

def convert_to_numeric(value):
    """Convert follower and reach values from 'M' notation to actual numbers."""
    if isinstance(value, str) and 'M' in value:
        return float(value.replace('M', '')) * 1_000_000
    # Convert to numeric, handling errors by setting invalid values to NaN
    # This ensures all values are numeric or NaN, allowing median to work correctly.
    return pd.to_numeric(value, errors='coerce')

def generate_rules(df):
    """Generate scores based on Followers, ER, and Potential Reach."""
    df['FOLLOWERS'] = df['FOLLOWERS'].apply(convert_to_numeric)
    # Fill NaN values after converting to numeric
    df['FOLLOWERS'] = df['FOLLOWERS'].fillna(df['FOLLOWERS'].median())
    # Convert 'ER' column to numeric by removing '%' and dividing by 100
    df['ER'] = df['ER'].str.rstrip('%').astype(float) / 100
    df['ER'] = df['ER'].fillna(df['ER'].median())
    # **Change:** Apply fillna after converting the entire column to numeric
    df['POTENTIAL REACH'] = df['POTENTIAL REACH'].apply(convert_to_numeric)
    df['POTENTIAL REACH'] = df['POTENTIAL REACH'].fillna(df['POTENTIAL REACH'].median())

    df['Score'] = df['FOLLOWERS'] * df['ER'] + df['POTENTIAL REACH']
    return df

def convolution_operation(scores, kernel_size=3):
    """Perform a simple convolution operation using a moving average filter."""
    return np.convolve(scores, np.ones(kernel_size)/kernel_size, mode='same')

def activation_function(c, eta):
    """HSF activation function: Binary classification."""
    return np.where(c > eta, 0, 1)

def batch_normalization(scores):
    """Perform batch normalization."""
    mean = np.mean(scores)
    std = np.std(scores)
    return (scores - mean) / (std + 1e-6)

def classify_influenced_nodes(df, eta=50_000_000, max_iter=100):
    """Classify influenced nodes based on engagement and reach."""
    df = generate_rules(df)
    scores = df['Score'].values

    for _ in range(max_iter):
        conv_scores = convolution_operation(scores)
        activated_nodes = activation_function(conv_scores, eta)
        normalized_scores = batch_normalization(conv_scores)

        df['Activated'] = activated_nodes
        df['Normalized'] = normalized_scores

        loss = np.sum(activated_nodes == 0)
        if loss == 0:
            break  # Stop criteria met

    return df[df['Activated'] == 1]  # Return classified influenced nodes

# Load dataset
df = pd.read_csv('/content/sample_data/instagram_data_india.csv')  # Update path
influenced_nodes = classify_influenced_nodes(df)

# Check results
print(influenced_nodes)

      #                                NAME    FOLLOWERS      ER COUNTRY  \
0     1            Virat Kohli @virat.kohli  267100000.0  0.0002   India   
1     2         Narendra Modi @narendramodi   87300000.0  0.0223   India   
2     3            Alia Bhatt 💛 @aliaabhatt   83700000.0  0.0002   India   
3     4           Katrina Kaif @katrinakaif   79700000.0  0.0087   India   
4     5     दीपिका पादुकोण @deepikapadukone   78900000.0  0.0218   India   
..  ...                                 ...          ...     ...     ...   
95   96           Raghav Juyal @raghavjuyal   11400000.0  0.0334   India   
96   97  Siddharth Nigam @thesiddharthnigam   11300000.0  0.0087   India   
97   98             MC STΔN 💔 @m___c___stan   11100000.0  0.1319   India   
98   99               Sahil Khan @sahilkhan   11000000.0  0.0115   India   
99  100               Ajay Devgn @ajaydevgn   10800000.0  0.0212   India   

                                   TOPIC OF INFLUENCE  POTENTIAL REACH  \
0            