In [None]:
#Imports
import gymnasium as gym
from gym import spaces
import numpy as np
import networkx as nx

In [29]:
class SocialNetworkEnv(gym.Env):

    #Env setup
    def __init__(self, numConsumer = 10):
        super().__init__()
        agent_type = np.random.choice(["real-information", "fake-information", "fact-checker", "consumer"])

        # Create Network
        self.graph = nx.DiGraph()
        
        # Add consumers
        for i in range(numConsumer):
            self.graph.add_node(i,
                                type="consumer", 
                                Q_value=0.0, 
                                trust_level=1.0, 
                                stored_information=[], 
                                reward=0, 
                                penalty=0)
        
        # Each consumer connects with ~2 others
        for _ in range(numConsumer * 2):  
            src = np.random.randint(0, numConsumer)
            dst = np.random.randint(0, numConsumer)
            if src != dst:
                self.graph.add_edge(src, dst, weight=1.0)


        # Fake information agent connected with every consumer
        self.graph.add_node(numConsumer,
                            type="fake-information", 
                            Q_value=0.0, 
                            reward=0, 
                            penalty=0)
        
        for node in self.graph.nodes:
            if node != 0:
                self.graph.add_edge(numConsumer, node)

        

        self.numConsumers = numConsumer
        #Action Space is a binary array indicating whether the agent sends information
        self.action_space = spaces.Box(low=0, high=1, shape=(numConsumer,), dtype=np.int32)
        self.observation_space = spaces.Dict({
            "trust_levels": spaces.Box(low=0, high=1, shape=(numConsumer,), dtype=np.float32),
            "Q_values": spaces.Box(low=-np.inf, high=np.inf, shape=(numConsumer,), dtype=np.float32),
        })

    #Reset Env
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)

        # Reset agents
        for node in self.graph.nodes:
            nodeType = self.graph.nodes[node]["type"]
            if nodeType == "consumer":
                self.graph.nodes[node]["Q_value"] = 0.0
                self.graph.nodes[node]["trust_level"] = 1.0
                self.graph.nodes[node]["stored_information"] = []
                self.graph.nodes[node]["reward"] = 0
                self.graph.nodes[node]["penalty"] = 0
            elif nodeType == "fake-information":
                self.graph.nodes[node]["Q_value"] = 0.0
                self.graph.nodes[node]["reward"] = 0
                self.graph.nodes[node]["penalty"] = 0

        # Generate random trust levels for all agents
        trust_levels = np.random.rand(self.numConsumers)
        
        # Return initial observation
        return {"trust_levels": trust_levels, 
                "Q_values": np.zeros(self.numConsumers)}, {}
        
    def step(self, action):
        rewards = 0
        penalties = 0

        #CHANGE LATER
        node = self.numConsumers

        for neighbor, send_info in zip(self.graph.neighbors(node), action):
            if send_info == 1:  # Propagate news to this neighbor
                neighbor_data = self.graph.nodes[neighbor]
                
                if self.graph.nodes[node]["type"] == "consumer":
                    # Update trust-level and stored-information based on the source
                    if self.graph.nodes[node]["type"] == "fake-information":
                        neighbor_data["trust_level"] -= 0.1
                    elif self.graph.nodes[node]["type"] == "real-information":
                        neighbor_data["trust_level"] += 0.1
                    
                    neighbor_data["stored_information"].append({
                        "news": f"news_from_{node}",
                        "source": node,
                        "truthfulness": np.random.uniform(0, 100) if self.graph.nodes[node]["type"] == "fake-information" else np.random.uniform(50, 100)
                    })
                    
                    if neighbor_data["type"] == "consumer":
                        rewards += 0.5  # Influence gained by consumer agents




        max_q_value = max(rewards - penalties, 0)
        self.graph.nodes[node]["Q_value"] += 0.1 * (rewards - penalties + 0.9 * max_q_value - self.graph.nodes[node]["Q_value"])
    
        # Return the updated state
        trust_levels = np.array([self.graph.nodes[i]["trust_level"] for i in range(self.numConsumers)])
        q_values = np.array([self.graph.nodes[i]["Q_value"] for i in range(self.numConsumers)])
        done = False  # In this simulation, the environment does not end
        info = {}

        return {"trust_levels": trust_levels, "Q_values": q_values}, rewards, done, info
    

    def render(self, mode="human"):
        """
        Optional: Render the graph for debugging or visualization.
        """
        if mode == "human":
            print("Graph Nodes and Attributes:")
            for node, data in self.graph.nodes(data=True):
                print(f"Node {node}: {data}")
            print("Graph Edges:")
            for src, dst, data in self.graph.edges(data=True):
                print(f"Edge {src} -> {dst}: {data}")

In [30]:
env = SocialNetworkEnv(numConsumer=10)
obs, _ = env.reset()
actions = np.random.choice([0, 1], size=(env.numConsumers))  # Random actions for each agent and neighbor
obs, rewards, done, info = env.step(actions)
env.render()

Graph Nodes and Attributes:
Node 0: {'type': 'consumer', 'Q_value': 0.0, 'trust_level': 1.0, 'stored_information': [], 'reward': 0, 'penalty': 0}
Node 1: {'type': 'consumer', 'Q_value': 0.0, 'trust_level': 1.0, 'stored_information': [], 'reward': 0, 'penalty': 0}
Node 2: {'type': 'consumer', 'Q_value': 0.0, 'trust_level': 1.0, 'stored_information': [], 'reward': 0, 'penalty': 0}
Node 3: {'type': 'consumer', 'Q_value': 0.0, 'trust_level': 1.0, 'stored_information': [], 'reward': 0, 'penalty': 0}
Node 4: {'type': 'consumer', 'Q_value': 0.0, 'trust_level': 1.0, 'stored_information': [], 'reward': 0, 'penalty': 0}
Node 5: {'type': 'consumer', 'Q_value': 0.0, 'trust_level': 1.0, 'stored_information': [], 'reward': 0, 'penalty': 0}
Node 6: {'type': 'consumer', 'Q_value': 0.0, 'trust_level': 1.0, 'stored_information': [], 'reward': 0, 'penalty': 0}
Node 7: {'type': 'consumer', 'Q_value': 0.0, 'trust_level': 1.0, 'stored_information': [], 'reward': 0, 'penalty': 0}
Node 8: {'type': 'consumer',

Simulation Results:
Average Influence: 476.64
Max Influence: 1000.00
Min Influence: -1000.00


In [None]:
# import gymnasium as gym
# from gymnasium import spaces
# import numpy as np
# import networkx as nx


# class SocialNetworkEnv(gym.Env):
#     """
#     Custom Environment for simulating a social network with agents of different types interacting.
#     """
#     def __init__(self, num_agents=10):
#         super(SocialNetworkEnv, self).__init__()
        
#         # Define the graph representing the network
#         self.graph = nx.DiGraph()
        
#         # Add nodes (agents) with attributes
#         for i in range(num_agents):
#             agent_type = np.random.choice(["real-information", "fake-information", "fact-checker", "consumer"])
#             self.graph.add_node(i, 
#                                 type=agent_type, 
#                                 Q_value=0.0, 
#                                 trust_level=1.0, 
#                                 stored_information=[], 
#                                 reward=0, 
#                                 penalty=0)
        
#         # Add random edges (connections) with initial weights
#         for _ in range(num_agents * 2):  # Each agent connects with ~2 others
#             src = np.random.randint(0, num_agents)
#             dst = np.random.randint(0, num_agents)
#             if src != dst:
#                 self.graph.add_edge(src, dst, weight=1.0)
        
#         # Action and observation spaces
#         self.num_agents = num_agents
#         self.action_space = spaces.Discrete(2)  # Each agent can choose to propagate news (1) or not (0)
#         self.observation_space = spaces.Dict({
#             "trust_levels": spaces.Box(low=0, high=1, shape=(num_agents,), dtype=np.float32),
#             "Q_values": spaces.Box(low=-np.inf, high=np.inf, shape=(num_agents,), dtype=np.float32),
#         })

#     def reset(self, seed=None, options=None):
#         super().reset(seed=seed)
        
#         # Reset agents
#         for node in self.graph.nodes:
#             self.graph.nodes[node]["Q_value"] = 0.0
#             self.graph.nodes[node]["trust_level"] = 1.0
#             self.graph.nodes[node]["stored_information"] = []
#             self.graph.nodes[node]["reward"] = 0
#             self.graph.nodes[node]["penalty"] = 0
        
#         # Generate random trust levels for all agents
#         trust_levels = np.random.rand(self.num_agents)
        
#         # Return initial observation
#         return {"trust_levels": trust_levels, 
#                 "Q_values": np.zeros(self.num_agents)}, {}

#     def step(self, actions):
#         """
#         Step function simulates one iteration of agents interacting in the graph.
#         Each agent decides whether to propagate news or not based on its Q-value and trust-level.
#         """
#         rewards = np.zeros(self.num_agents)
#         penalties = np.zeros(self.num_agents)
        
#         # Simulate actions for all agents
#         for node, action in enumerate(actions):
#             if action == 1:  # Propagate news
#                 for neighbor in self.graph.neighbors(node):
#                     neighbor_data = self.graph.nodes[neighbor]
                    
#                     # Update trust-level and stored-information based on the source
#                     if self.graph.nodes[node]["type"] == "fake-information":
#                         neighbor_data["trust_level"] -= 0.1
#                     elif self.graph.nodes[node]["type"] == "real-information":
#                         neighbor_data["trust_level"] += 0.1
                    
#                     neighbor_data["stored_information"].append({
#                         "news": f"news_from_{node}",
#                         "source": node,
#                         "truthfulness": np.random.uniform(0, 100) if self.graph.nodes[node]["type"] == "fake-information" else np.random.uniform(50, 100)
#                     })
                    
#                     # Assign rewards/penalties
#                     if neighbor_data["type"] == "fact-checker":
#                         if neighbor_data["stored_information"][-1]["truthfulness"] < 50:
#                             rewards[node] += 1  # Caught fake news
#                             penalties[node] += 1  # Penalize misinformation source
                    
#                     if neighbor_data["type"] == "consumer":
#                         rewards[node] += 0.5  # Influence gained

#         # Update Q-values for all nodes
#         for node in self.graph.nodes:
#             max_q_value = max(rewards[node] - penalties[node], 0)
#             self.graph.nodes[node]["Q_value"] += 0.1 * (rewards[node] - penalties[node] + 0.9 * max_q_value - self.graph.nodes[node]["Q_value"])
        
#         # Return the updated state
#         trust_levels = np.array([self.graph.nodes[i]["trust_level"] for i in range(self.num_agents)])
#         q_values = np.array([self.graph.nodes[i]["Q_value"] for i in range(self.num_agents)])
#         done = False  # In this simulation, the environment does not end
#         info = {}

#         return {"trust_levels": trust_levels, "Q_values": q_values}, rewards, done, info

#     def render(self, mode="human"):
#         """
#         Optional: Render the graph for debugging or visualization.
#         """
#         if mode == "human":
#             print("Graph Nodes and Attributes:")
#             for node, data in self.graph.nodes(data=True):
#                 print(f"Node {node}: {data}")
#             print("Graph Edges:")
#             for src, dst, data in self.graph.edges(data=True):
#                 print(f"Edge {src} -> {dst}: {data}")


# env = SocialNetworkEnv(num_agents=10)
# obs, _ = env.reset()
# print(obs)
# actions = np.random.choice([0, 1], size=10)  # Random actions
# obs, rewards, done, info = env.step(actions)
# print(obs, rewards)
# env.render()


{'trust_levels': array([0.34337057, 0.63483647, 0.47055293, 0.53561651, 0.7468535 ,
       0.09321691, 0.03131633, 0.70775766, 0.65756737, 0.12412937]), 'Q_values': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])}
{'trust_levels': array([1. , 0.9, 0.9, 1. , 0.9, 0.9, 1. , 1. , 1. , 1. ]), 'Q_values': array([0.   , 0.   , 0.095, 0.   , 0.   , 0.   , 0.095, 0.   , 0.19 ,
       0.   ])} [0.  0.  0.5 0.  0.  0.  0.5 0.  1.  0. ]
Graph Nodes and Attributes:
Node 0: {'type': 'fake-information', 'Q_value': 0.0, 'trust_level': 1.0, 'stored_information': [], 'reward': 0, 'penalty': 0}
Node 1: {'type': 'fact-checker', 'Q_value': 0.0, 'trust_level': 0.9, 'stored_information': [{'news': 'news_from_0', 'source': 0, 'truthfulness': 84.93738182499965}, {'news': 'news_from_7', 'source': 7, 'truthfulness': 72.38982891008934}], 'reward': 0, 'penalty': 0}
Node 2: {'type': 'fact-checker', 'Q_value': 0.095, 'trust_level': 0.9, 'stored_information': [{'news': 'news_from_4', 'source': 4, 'truthfulness': 64.