In [6]:
import matplotlib.pyplot as plt
import pandas as pd
import os
import re
import numpy as np
from scipy.optimize import curve_fit
from scipy.interpolate import interp1d

In [7]:
output_folder_path = "../outputs/RealWorld/facebookMedium/bots/01"
input_folder_path="../../output/RealWorld/facebookMedium/bots/01"
#datasets=['astroPh', 'emailEU', 'facebook', 'facebookMedium', 'facebookArtist', 'gitInteractions', 'hepPh', 'lastfm', 'redditHyperlink', 'twitchDE', 'twitchENGB', 'twitchES', 'twitchFR', 'twitchPTBR', 'twitchRU']
#botRatio=['m0_', 'm25_', 'm50_', 'm75_', 'm100_', 'm125_', 'm150_', 'm175_', 'm200_']
datasets=['facebookMedium']
startRatio=['01', '02', '03', '04', '05']
rewireRatio=[ 'r10_', 'r20_', 'r30_', 'r40_', 'r50_', 'r60_', 'r70_', 'r80_', 'r90_'] #key list for dictionary 
data2={}
average={}


In [8]:
def create_data(path):
    files=os.listdir(path)
    botRatio=[] #this will also act as key
    for file in files:
        if file.endswith(".txt"):
            match = re.search(r"_m(\d+)_", file)
        if match:
            result = match.group(0)
        if not result in botRatio:
            botRatio.append(result)
    botRatio=sorted(botRatio, key=lambda x: int(x.strip('_m')))
    #print(botRatio)
    #We first group all data by rewiring rate in the outer dict and then by bot ratio in the inner dict
    #data2[a][b] contains a list of all final opinion densities with reiwirte rate=a and bot ratio =b
    #average[a][b] contains average of the respective list in data2
    for rr in rewireRatio:
        data2[rr]={}
        average[rr]={}
        for mat in botRatio:
            data2[rr][mat]=[]
            for file in files:
                if mat in file and rr in file:
                    #print(path+" "+file)
                    with open(os.path.join(path, file), 'r') as f:
                        content = pd.read_csv(path+"/"+file, names=['Epoch', 'Pop', 'Frac', 'DiscEdge'], skiprows=1, sep=" ")
                        content=content.drop(['Epoch', 'Pop'], axis=1)    
                        finalPerc=content.iloc[-1, 0]
                        data2[rr][mat].append(finalPerc)
            avg=sum(data2[rr][mat])/len(data2[rr][mat])
            if mat not in average[rr]: 
                average[rr][mat] = {}
            average[rr][mat]=avg

    # print(average)
    # print(data2)

In [9]:
def create_graphs():
    # Create a 3x3 grid of subplots
    fig, axes = plt.subplots(3, 3, figsize=(15, 15))
    axes = axes.flatten()  # Flatten to iterate over each subplot
    for idx, (outer_key, inner_dict) in enumerate(average.items()):
        if idx>=9:
            break
        x_values = [float(k.strip('_m'))/10 for k in inner_dict.keys()]  # Extract numeric parts from keys

        y_values = list(inner_dict.values())  # Get corresponding values
        x = np.array(x_values)
        y = np.array(y_values)

        # Define a function for the regression model (e.g., polynomial of degree 2)
        def model(x, a, b, c):
            return a * x**2 + b * x + c

        # Perform the curve fitting
        params, _ = curve_fit(model, x, y)

        # Create a function from the fitted parameters
        fitted_model = lambda x: model(x, *params)

        # Find the x value where y = 0.5 using interpolation
        # Generate fine-grained x values for better accuracy
        x_fine = np.linspace(min(x), max(x), 1000)
        y_fine = fitted_model(x_fine)

        # Interpolate to find the x value for y = 0.5
        interp_func = interp1d(y_fine, x_fine, bounds_error=False, fill_value="extrapolate")
        x_at_y_0_5 = interp_func(0.5)
        ax = axes[idx]  # Select the subplot
        outer_key=''.join(filter(str.isdigit, outer_key))
        ax.plot(x_values, y_values, marker='o', label=outer_key)
        ax.set_title(f'Rewiring Rate = {outer_key}%, Inflection Point = {x_at_y_0_5:.2f}%')
        ax.set_xlabel('Bot Ratio (%)')
        ax.set_ylabel('Final Opinion Density of Minority')
        ax.grid(True)
    # Hide any unused subplots if data has fewer than 9 keys
    for idx in range(len(average), 9):
        fig.delaxes(axes[idx])

    # Adjust layout
    plt.tight_layout()
    plt.show()
    figpath=output_folder_path+"/LinePlot.png"
    fig.savefig(figpath, dpi=350)
        

In [1]:
for dataset in datasets:
    create_data(input_folder_path)
    create_graphs()

NameError: name 'datasets' is not defined