In [2]:
import torch
from torch_geometric.data import Data

num_nodes = 100

train_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[:20] = True  

x = torch.randn(num_nodes, 16)  
y = torch.randint(0, 3, (num_nodes,))  

edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]], dtype=torch.long)  

data = Data(x=x, edge_index=edge_index, y=y, train_mask=train_mask)


In [3]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(16, 32)
        self.conv2 = GCNConv(32, 3)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

model = GCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss

for epoch in range(200):
    loss = train()
    print(f'Epoch {epoch+1}: Loss = {loss.item()}')



Epoch 1: Loss = 1.1416312456130981
Epoch 2: Loss = 0.9766517877578735
Epoch 3: Loss = 0.8443997502326965
Epoch 4: Loss = 0.7360615730285645
Epoch 5: Loss = 0.6455391049385071
Epoch 6: Loss = 0.5686070322990417
Epoch 7: Loss = 0.5029894113540649
Epoch 8: Loss = 0.4471339285373688
Epoch 9: Loss = 0.39898037910461426
Epoch 10: Loss = 0.3572620749473572
Epoch 11: Loss = 0.3208051323890686
Epoch 12: Loss = 0.2894327938556671
Epoch 13: Loss = 0.2623589336872101
Epoch 14: Loss = 0.23931550979614258
Epoch 15: Loss = 0.2199365794658661
Epoch 16: Loss = 0.2037183940410614
Epoch 17: Loss = 0.1905381679534912
Epoch 18: Loss = 0.17951081693172455
Epoch 19: Loss = 0.170089989900589
Epoch 20: Loss = 0.16206490993499756
Epoch 21: Loss = 0.15493540465831757
Epoch 22: Loss = 0.14863745868206024
Epoch 23: Loss = 0.14282920956611633
Epoch 24: Loss = 0.13739457726478577
Epoch 25: Loss = 0.13230040669441223
Epoch 26: Loss = 0.12755826115608215
Epoch 27: Loss = 0.12328652292490005
Epoch 28: Loss = 0.11930672

In [8]:
import pandas as pd
import networkx as nx
from node2vec import Node2Vec
from sklearn.cluster import KMeans

data = pd.read_csv('all_collected_data.csv', encoding="EUC-KR")
edges = data[["출발 행정동 코드", "도착 행정동 코드", "이동인구(합)"]]

G = nx.Graph()
for index, row in edges.iterrows():
    if pd.notna(row["이동인구(합)"]):
        G.add_edge(
            row["출발 행정동 코드"], row["도착 행정동 코드"], weight=row["이동인구(합)"]
        )

node2vec = Node2Vec(G, dimensions=64, walk_length=30, num_walks=200, workers=4)

model = node2vec.fit(window=10, min_count=1, batch_words=4)

node_embeddings = model.wv.vectors

kmeans = KMeans(n_clusters=5)
communities = kmeans.fit_predict(node_embeddings)

print("Detected communities:", communities)


Computing transition probabilities: 100%|██████████| 77/77 [00:00<00:00, 231.08it/s]


Detected communities: [0 0 0 3 2 4 0 4 1 4 3 3 0 1 4 0 4 1 3 3 2 1 1 1 3 1 0 4 2 0 3 1 2 0 0 4 4
 2 3 0 3 0 3 3 4 3 0 1 2 3 0 0 3 3 0 2 3 3 0 3 1 3 3 3 3 3 3 0 0 3 0 1 0 2
 1 2 3]


  super()._check_params_vs_input(X, default_n_init=10)


In [17]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
import networkx as nx
from torch_geometric.utils import from_networkx
import pandas as pd

data_df = pd.read_csv('all_collected_data.csv', encoding="EUC-KR")
edges = data_df[["출발 행정동 코드", "도착 행정동 코드", "이동인구(합)"]]

G = nx.Graph()
for index, row in edges.iterrows():
    if pd.notna(row["이동인구(합)"]):
        G.add_edge(
            int(row["출발 행정동 코드"]), int(row["도착 행정동 코드"]), weight=float(row["이동인구(합)"])
        )

data = from_networkx(G)

node_embeddings = torch.tensor(node_embeddings, dtype=torch.float) 
data.x = node_embeddings  
data.y = torch.tensor(communities, dtype=torch.long)

class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(data.num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, max(data.y)+1)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(hidden_channels=64).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    return loss.item()

for epoch in range(200):
    loss = train()
    if epoch % 100 == 0 :
        print(f'Epoch {epoch+1}: Loss = {loss}')

model.eval()
_, pred = model(data.x, data.edge_index).max(dim=1)
correct = (pred == data.y).sum().item()
accuracy = correct / data.y.size(0)
print(f'Accuracy: {accuracy:.4f}')

from collections import defaultdict

model.eval()
_, pred = model(data.x, data.edge_index).max(dim=1)

community_nodes = defaultdict(list)
for node_index, community in enumerate(pred.tolist()):
    community_nodes[community].append(node_index)

for community, nodes in community_nodes.items():
    print(f'Community {community}: Nodes {nodes}')


  node_embeddings = torch.tensor(node_embeddings, dtype=torch.float)  # node2vec의 결과


tensor([0, 0, 0, 3, 2, 4, 0, 4, 1, 4, 3, 3, 0, 1, 4, 0, 4, 1, 3, 3, 2, 1, 1, 1,
        3, 1, 0, 4, 2, 0, 3, 1, 2, 0, 0, 4, 4, 2, 3, 0, 3, 0, 3, 3, 4, 3, 0, 1,
        2, 3, 0, 0, 3, 3, 0, 2, 3, 3, 0, 3, 1, 3, 3, 3, 3, 3, 3, 0, 0, 3, 0, 1,
        0, 2, 1, 2, 3])
Epoch 1: Loss = 1.614825963973999
Epoch 101: Loss = 1.4794566631317139
Accuracy: 0.3377
Community 3: Nodes [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76]


In [20]:
import networkx as nx
import pandas as pd
from gensim.models import Word2Vec
from sklearn.cluster import KMeans
import numpy as np
import random

data_df = pd.read_csv('all_collected_data.csv', encoding="EUC-KR")
edges = data_df[["출발 행정동 코드", "도착 행정동 코드", "이동인구(합)"]]

G = nx.Graph()
for index, row in edges.iterrows():
    if pd.notna(row["이동인구(합)"]):
        G.add_edge(
            int(row["출발 행정동 코드"]), int(row["도착 행정동 코드"]), weight=float(row["이동인구(합)"])
        )

def perform_random_walks(graph, num_walks, walk_length):
    walks = []
    for _ in range(num_walks):
        for starting_node in graph.nodes():
            walk = [starting_node]
            while len(walk) < walk_length:
                current_node = walk[-1]
                neighbors = list(graph.neighbors(current_node))
                if not neighbors:
                    break
                next_node = random.choice(neighbors)
                walk.append(next_node)
            walks.append(walk)
    return walks


# DeepWalk 실행
walks = perform_random_walks(G, num_walks=10, walk_length=80)
walks = [[str(node) for node in walk] for walk in walks]

model = Word2Vec(sentences=walks, vector_size=64, window=10, min_count=1, sg=1, workers=4, epochs=10)

node_embeddings = np.array([model.wv[str(node)] for node in G.nodes()])

kmeans = KMeans(n_clusters=10)
communities = kmeans.fit_predict(node_embeddings)

print("Detected communities:", communities)


Detected communities: [2 3 3 3 2 3 3 9 2 3 9 2 3 2 3 2 2 3 3 9 3 2 3 2 2 6 0 7 8 6 6 8 6 7 6 1 6
 7 5 6 7 4 7 5 7 6 5 8 7 7 0 7 1 7 5 5 5 0 7 4 1 7 6 6 0 5 0 6 1 0 8 6 0 0
 7 0 7]


  super()._check_params_vs_input(X, default_n_init=10)


In [21]:
import random

def perform_random_walks(graph, num_walks, walk_length):
    walks = []
    for _ in range(num_walks):
        for starting_node in graph.nodes():
            walk = [starting_node]
            while len(walk) < walk_length:
                current_node = walk[-1]
                neighbors = list(graph.neighbors(current_node))
                if not neighbors:
                    break
                next_node = random.choice(neighbors)
                walk.append(next_node)
            walks.append(walk)
    return walks

import pandas as pd
import networkx as nx
from gensim.models import Word2Vec
from sklearn.cluster import KMeans
import numpy as np

data_df = pd.read_csv('all_collected_data.csv', encoding="EUC-KR")
edges = data_df[["출발 행정동 코드", "도착 행정동 코드", "이동인구(합)"]]

G = nx.Graph()
for index, row in edges.iterrows():
    if pd.notna(row["이동인구(합)"]):
        G.add_edge(
            int(row["출발 행정동 코드"]), int(row["도착 행정동 코드"]), weight=float(row["이동인구(합)"])
        )

walks = perform_random_walks(G, num_walks=10, walk_length=80)
walks = [[str(node) for node in walk] for walk in walks]

model = Word2Vec(sentences=walks, vector_size=64, window=10, min_count=1, sg=1, workers=4, epochs=10)
node_embeddings = np.array([model.wv[str(node)] for node in G.nodes()])

kmeans = KMeans(n_clusters=10)
communities = kmeans.fit_predict(node_embeddings)

print("Detected communities:", communities)


Detected communities: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 3 4 3 2 0 0 0 3 2 0 4
 2 3 2 2 2 3 0 2 3 3 0 2 0 4 2 2 0 4 3 0 4 0 3 3 3 2 0 2 3 4 2 4 0 4 4 3 3
 4 3 2]


  super()._check_params_vs_input(X, default_n_init=10)


In [22]:
import pandas as pd
import networkx as nx
from gensim.models import Word2Vec
from sklearn.cluster import KMeans
import numpy as np
import random

def perform_random_walks(graph, num_walks, walk_length):
    walks = []
    for _ in range(num_walks):
        for starting_node in graph.nodes():
            walk = [starting_node]
            while len(walk) < walk_length:
                current_node = walk[-1]
                neighbors = list(graph.neighbors(current_node))
                if not neighbors:
                    break
                next_node = random.choice(neighbors)
                walk.append(next_node)
            walks.append(walk)
    return walks

data_df = pd.read_csv('all_collected_data.csv', encoding="EUC-KR")
edges = data_df[["출발 행정동 코드", "도착 행정동 코드", "이동인구(합)"]]

G = nx.Graph()
for index, row in edges.iterrows():
    if pd.notna(row["이동인구(합)"]):
        G.add_edge(
            int(row["출발 행정동 코드"]), int(row["도착 행정동 코드"]), weight=float(row["이동인구(합)"])
        )

walks = perform_random_walks(G, num_walks=10, walk_length=80)
walks = [[str(node) for node in walk] for walk in walks]

model = Word2Vec(sentences=walks, vector_size=64, window=10, min_count=1, sg=1, workers=4, epochs=10)
node_embeddings = np.array([model.wv[str(node)] for node in G.nodes()])

kmeans = KMeans(n_clusters=10)
communities = kmeans.fit_predict(node_embeddings)

community_dict = {}
for node, community in zip(G.nodes(), communities):
    if community not in community_dict:
        community_dict[community] = []
    community_dict[community].append(node)

for community, nodes in community_dict.items():
    print(f"Community {community}: {nodes}")


Community 3: [11010, 11030, 11060, 11100, 11120, 11170, 11180, 11190, 11200, 11220]
Community 8: [11020, 11040, 11080, 11090, 11110, 11130, 11160, 11230, 11250]
Community 1: [11050, 11070, 11140, 11150, 11210, 11240]
Community 2: [23010, 23080, 31092, 31101, 31110, 31120, 31260, 31280]
Community 4: [23020, 23060, 31011, 31012, 31023, 31191, 31240, 31270, 31370]
Community 0: [23030, 23310, 31042, 31080, 31192]
Community 9: [23040, 23050, 23070, 31014, 31030, 31070, 31103, 31104, 31150, 31160, 31170, 31180, 31210, 31220, 31230]
Community 7: [23320, 31022, 31050, 31091, 31140, 31193, 31250, 31350, 31380]
Community 6: [31013, 31130, 31200]
Community 5: [31021, 31041, 31060]


  super()._check_params_vs_input(X, default_n_init=10)
