# Dummy social network using synthetic data

In [16]:
!pip install networkx gqlalchemy faker
!pip freeze > requirements.txt

Collecting networkx
  Using cached networkx-3.4.2-py3-none-any.whl (1.7 MB)
Collecting gqlalchemy
  Downloading gqlalchemy-1.7.0-py3-none-any.whl (94 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.7/94.7 kB[0m [31m762.2 kB/s[0m eta [36m0:00:00[0m1m835.6 kB/s[0m eta [36m0:00:01[0m
[?25hCollecting faker
  Downloading faker-37.1.0-py3-none-any.whl (1.9 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hCollecting adlfs<2025.0.0,>=2023.9.0
  Downloading adlfs-2024.12.0-py3-none-any.whl (41 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.8/41.8 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dacite<2.0.0,>=1.6.0
  Downloading dacite-1.9.2-py3-none-any.whl (16 kB)
Collecting neo4j<6.0.0,>=4.4.3
  Using cached neo4j-5.28.1-py3-none-any.whl (312 kB)
Collecting numpy<2.0.0,>

In [1]:
!pip install -r requirements.txt



In [2]:
import random
import uuid
import faker
import networkx as nx
from gqlalchemy import Memgraph

In [3]:
# Initialize Faker
fake = faker.Faker()

# Create NetworkX Graph
G = nx.Graph()

# Utility function to create a fake user
def create_user(platform):
    return {
        "user_id": str(uuid.uuid4()),
        "platform": platform,
        "name": fake.name(),
        "email": fake.email(),
        "phone": fake.phone_number(),
        "age": random.randint(18, 65),
        "gender": random.choice(["Male", "Female", "Other"]),
        "nationality": fake.country(),
        "friendList": [],
        "connectionList": [],
        "emailList": [],
        "twitterInteraction": {
            "post_keywords": random.sample(
                ["floods", "company event", "conference", "catering", "parties"],
                k=random.randint(1, 5)
            ),
            "post_sentiment": [random.choice([0, 1]) for _ in range(4)],
            "time_spent_per_post": [f"{random.randint(400, 700)}ms" for _ in range(4)],
        }
    }

In [4]:
print("Generating Users")
# Generate Users
users = []

for platform in ["Facebook", "Twitter", "LinkedIn"]:
    for _ in range(100):
        users.append(create_user(platform))

# Map user_id to user for quick access
user_map = {user["user_id"]: user for user in users}

# Now randomly create relationships
user_ids = list(user_map.keys())
print("User generation complete\n")


Generating Users
User generation complete



In [5]:
print("Create the connection lists")
for user in users:
    # Random friends for Facebook
    if user["platform"] == "Facebook":
        friends = random.sample(user_ids, k=random.randint(5, 20))
        user["friendList"] = friends
    
    # Random connections for LinkedIn
    if user["platform"] == "LinkedIn":
        connections = random.sample(user_ids, k=random.randint(5, 20))
        user["connectionList"] = connections
    
    # Random email communications
    emails = random.sample(user_ids, k=random.randint(5, 20))
    user["emailList"] = emails
print("Connection list creation complete\n")

Create the connection lists
Connection list creation complete



In [6]:
print("Add users to graph")
# Add nodes and edges to NetworkX Graph
for user in users:
    G.add_node(user["user_id"], **user)

for user in users:
    for friend_id in user["friendList"]:
        if G.has_node(friend_id):
            G.add_edge(user["user_id"], friend_id, relationship="FRIEND")

    for connection_id in user["connectionList"]:
        if G.has_node(connection_id):
            G.add_edge(user["user_id"], connection_id, relationship="LINKEDIN_CONNECTION")

    for email_id in user["emailList"]:
        if G.has_node(email_id):
            G.add_edge(user["user_id"], email_id, relationship="EMAIL_CONTACT")
print("Addition of users to graph complete\n")

Add users to graph
Addition of users to graph complete



In [7]:
print("Dump data into memgraph")
# Connect to Memgraph and push the data
memgraph = Memgraph()

# Optional: Clear database
memgraph.drop_database()

# Create Nodes
for node_id, data in G.nodes(data=True):
    query = f"""
    CREATE (:User {{
        user_id: "{data['user_id']}",
        platform: "{data['platform']}",
        name: "{data['name']}",
        email: "{data['email']}",
        phone: "{data['phone']}",
        age: {data['age']},
        gender: "{data['gender']}",
        nationality: "{data['nationality']}",
        twitter_post_keywords: {data['twitterInteraction']['post_keywords']},
        twitter_post_sentiment: {data['twitterInteraction']['post_sentiment']},
        twitter_time_spent_per_post: {data['twitterInteraction']['time_spent_per_post']}
    }})
    """
    memgraph.execute(query)

# Create Edges
for source, target, data in G.edges(data=True):
    relationship = data["relationship"]
    query = f"""
    MATCH (a:User {{user_id: "{source}"}})
    MATCH (b:User {{user_id: "{target}"}})
    CREATE (a)-[:{relationship}]->(b)
    """
    memgraph.execute(query)

print("Data pushed to Memgraph successfully!")


Dump data into memgraph
Data pushed to Memgraph successfully!


# Company Sales agent

In [8]:
import json
from typing import List, Dict, Any
from uuid import uuid4

#### Get company employee data from the memgraph

In [9]:
from gqlalchemy import Memgraph

# Connect to Memgraph (by default it connects to localhost:7687)
memgraph = Memgraph()

# Query to get nodes with 5 to 10 edges
query = """
MATCH (n)
WHERE size([(n)-[]-() | 1]) >= 5 AND size([(n)-[]-() | 1]) <= 10
RETURN n.user_id AS user_id,
       n.platform AS platform,
       n.name AS name,
       n.email AS email,
       n.phone AS phone,
       n.age AS age,
       n.gender AS gender,
       n.nationality AS nationality,
       n.friendList AS friendList,
       n.connectionList AS connectionList,
       n.emailList AS emailList,
       n.twitterInteraction AS twitterInteraction
"""

# Execute the query
results = memgraph.execute_and_fetch(query)

# Collect and print the results
users = []
for record in results:
    user = {
        "user_id": record["user_id"],
        "platform": record["platform"],
        "name": record["name"],
        "email": record["email"],
        "phone": record["phone"],
        "age": record["age"],
        "gender": record["gender"],
        "nationality": record["nationality"],
        "friendList": record["friendList"],
        "connectionList": record["connectionList"],
        "emailList": record["emailList"],
        "twitterInteraction": record["twitterInteraction"],
    }
    users.append(user)

# Optional: Pretty print the first few users
from pprint import pprint
pprint(users[:5])


mg_raw_transport_recv: connection closed by server


DatabaseError: failed to receive chunk size

In [10]:
# user model
class UserData:
    def __init__(
        self,
        name: str,
        email: str,
        phone: str,
        age: int,
        gender: str,
        nationality: str,
        friend_list: List[Dict[str, Any]],
        connection_list: List[Dict[str, Any]],
        email_list: List[Dict[str, Any]],
        twitter_interaction: List[str],
    ):
        self.name = name
        self.email = email
        self.phone = phone
        self.age = age
        self.gender = gender
        self.nationality = nationality
        self.friend_list = friend_list
        self.connection_list = connection_list
        self.email_list = email_list
        self.twitter_interaction = twitter_interaction

    def to_dict(self) -> Dict[str, Any]:
        return {
            "name": self.name,
            "email": self.email,
            "phone": self.phone,
            "age": self.age,
            "gender": self.gender,
            "nationality": self.nationality,
            "friend_list": self.friend_list,
            "connection_list": self.connection_list,
            "email_list": self.email_list,
            "twitter_interaction": self.twitter_interaction,
        }


In [None]:
# Function to construct a consolidated company profile
def construct_company_profile(user_data_list: List[UserData]) -> Dict[str, Any]:
    company_profile = {
        "company_id": str(uuid4()),
        "total_users": len(user_data_list),
        "users": [],
        "aggregated_data": {
            "average_age": 0,
            "gender_distribution": {},
            "nationality_distribution": {},
            "common_twitter_interactions": [],
            "social_network_stats": {
                "total_friends": 0,
                "total_connections": 0,
                "total_email_interactions": 0,
            },
        },
    }

    total_age = 0
    gender_counts = {}
    nationality_counts = {}
    twitter_interaction_counts = {}
    total_friends = 0
    total_connections = 0
    total_email_interactions = 0

    for user_data in user_data_list:
        user_dict = user_data.to_dict()
        company_profile["users"].append(user_dict)

        # Aggregate age
        total_age += user_data.age

        # Aggregate gender distribution
        gender = user_data.gender
        gender_counts[gender] = gender_counts.get(gender, 0) + 1

        # Aggregate nationality distribution
        nationality = user_data.nationality
        nationality_counts[nationality] = nationality_counts.get(nationality, 0) + 1

        # Aggregate Twitter interactions
        for keyword in user_data.twitter_interaction:
            twitter_interaction_counts[keyword] = twitter_interaction_counts.get(keyword, 0) + 1

        # Aggregate social network stats
        total_friends += len(user_data.friend_list)
        total_connections += len(user_data.connection_list)
        total_email_interactions += len(user_data.email_list)

    # Calculate averages and distributions
    company_profile["aggregated_data"]["average_age"] = total_age / len(user_data_list) if user_data_list else 0
    company_profile["aggregated_data"]["gender_distribution"] = gender_counts
    company_profile["aggregated_data"]["nationality_distribution"] = nationality_counts
    company_profile["aggregated_data"]["common_twitter_interactions"] = sorted(
        twitter_interaction_counts.items(), key=lambda x: x[1], reverse=True
    )[:10]  # Top 10 keywords

    company_profile["aggregated_data"]["social_network_stats"]["total_friends"] = total_friends
    company_profile["aggregated_data"]["social_network_stats"]["total_connections"] = total_connections
    company_profile["aggregated_data"]["social_network_stats"]["total_email_interactions"] = total_email_interactions

    return company_profile