In [1]:
import networkx as nx
import pandas as pd
import numpy as np

In [2]:
feature_df = pd.read_csv('data/graph_feature.csv')
structure_df = pd.read_csv('data/graph_structure.csv')

In [38]:
col = feature_df.columns.tolist()
col.remove('color')
new_col = ['color_red', 'color_black', 'color_green', 'color_blue'] + col
print(new_col)

['color_red', 'color_black', 'color_green', 'color_blue', 'session', 'uid', 'round', 'score', 'num_of_neighbor', 'hist_color', 'hist_neighbor', 'hist_skip', 'action']


In [39]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer([('color', OneHotEncoder(), [3])], remainder='passthrough')
feature_onehot = np.array(ct.fit_transform(feature_df))
feature_onehot_df = pd.DataFrame(feature_onehot, columns=new_col)
feature_onehot_df

Unnamed: 0,color_red,color_black,color_green,color_blue,session,uid,round,score,num_of_neighbor,hist_color,hist_neighbor,hist_skip,action
0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,6.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,1.0,1.0,2.0,1.0,0.0,6.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1.0,1.0,3.0,1.0,0.0,6.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,4.0,1.0,0.0,6.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,1.0,5.0,1.0,0.0,6.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2148,0.0,1.0,0.0,0.0,13.0,16.0,6.0,100.0,7.0,0.2,0.0,0.8,1.0
2149,0.0,0.0,1.0,0.0,13.0,17.0,6.0,100.0,7.0,0.2,0.2,0.6,1.0
2150,1.0,0.0,0.0,0.0,13.0,18.0,6.0,89.0,9.0,0.4,0.0,0.6,2.0
2151,0.0,0.0,0.0,1.0,13.0,19.0,6.0,100.0,7.0,0.0,0.6,0.4,1.0


In [44]:
def create_graph(session, df):  # df size = graph size
    size = len(df)+1
    G = nx.Graph()
    nodes = range(1, size)
    G.add_nodes_from(nodes)
    edge_list = []
    for uid in range(1, size):
        neighbor_list = df[df['uid'] == uid].iloc[0]['linked'].split(',')
        edge_list = [(int(uid), int(neighbor)) for neighbor in neighbor_list]
        G.add_edges_from(edge_list)
    return G

def add_feature(graph, f_df):
    feature_col_list = ['color_red', 'color_black', 'color_green', 'color_blue', 'score', 'num_of_neighbor', 'hist_color', 'hist_neighbor', 'hist_skip']
    for n in graph.nodes():
        for f in feature_col_list:
            graph.nodes[n][f] = f_df[f_df['uid']==n][f].tolist()[0]    # 取出uid符合的user的f欄位，從series型態轉成list再取值
            
    return graph

In [46]:
graph_list = []
label_list = []
for session in range(1, 14):
    s_all_df = structure_df[structure_df['session'] == session]
    for round in s_all_df['round'].unique():
        
        s_df = s_all_df[s_all_df['round'] == round]
        f_df = feature_onehot_df.iloc[s_df.index]
        
        graph = create_graph(session, s_df)
        graph = add_feature(graph, f_df)
        graph_list.append(graph)

        labels = f_df['action'].tolist()
        label_list.append(labels)
print(len(graph_list))
print(len(label_list))

106
106


---

取得graph size的方法

In [47]:
graph_list[0].number_of_nodes()

15

取得node feature vector的方法

In [48]:
# for example
# list(graph_list[0].nodes[1].values())
graph_list[0].nodes[1]

{'color_red': 0.0,
 'color_black': 0.0,
 'color_green': 0.0,
 'color_blue': 1.0,
 'score': 0.0,
 'num_of_neighbor': 6.0,
 'hist_color': 0.0,
 'hist_neighbor': 0.0,
 'hist_skip': 0.0}

In [49]:
feature_vector_list = []
for g in graph_list:
    feature_vector_list.append([list(g.nodes[j].values()) for j in range(1, g.number_of_nodes()+1)])

取得adj matirc的方法

In [50]:
nx.adjacency_matrix(graph_list[0]).todense()

  nx.adjacency_matrix(graph_list[0]).todense()


matrix([[0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
        [1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
        [1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1],
        [1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1],
        [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1],
        [1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0]], dtype=int32)

create dataset & dataloader

In [54]:
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
import torch

In [55]:
class graphDataset(Dataset):
    def __init__(self, graph_list, label_list):
        feature_matrix_list = []
        for g in graph_list:
            feature_matrix = [list(g.nodes[j].values()) for j in range(1, g.number_of_nodes()+1)]
            feature_matrix_list.append(torch.tensor(feature_matrix, dtype=torch.float))
        
        self.feature_matrix = feature_matrix_list
        
        adj_matrix_list = []
        for g in graph_list:
            adj_matrix = nx.adjacency_matrix(g).todense()
            adj_matrix_list.append(torch.tensor(adj_matrix, dtype=torch.float))
        self.adj_matrix = adj_matrix_list

        labels = []
        for i in range(len(label_list)):
            labels.append(F.one_hot(torch.tensor(label_list[i], dtype=torch.int64)))
        self.labels = labels
        #print(len(self.feature_matrix), len(self.adj_matrix), len(self.labels))

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        features = self.feature_matrix[idx]
        adj_matrix = self.adj_matrix[idx]
        labels = self.labels[idx]

        return features, adj_matrix, labels

In [56]:
training_data = graphDataset(graph_list, label_list)

  adj_matrix = nx.adjacency_matrix(g).todense()
  labels.append(F.one_hot(torch.tensor(label_list[i], dtype=torch.int64)))


In [57]:
training_loader = DataLoader(training_data, batch_size=1, shuffle=True)

In [58]:
features, adj, labels = next(iter(training_loader))

In [59]:
features.shape

torch.Size([1, 18, 9])

In [60]:
adj.shape

torch.Size([1, 18, 18])

In [61]:
labels.shape

torch.Size([1, 18, 3])

In [163]:
labels.squeeze()

tensor([[1, 0, 0],
        [1, 0, 0],
        [1, 0, 0],
        [0, 0, 1],
        [0, 0, 1],
        [1, 0, 0],
        [0, 0, 1],
        [0, 1, 0],
        [0, 0, 1],
        [0, 1, 0],
        [0, 1, 0],
        [1, 0, 0],
        [1, 0, 0],
        [0, 1, 0],
        [0, 1, 0],
        [1, 0, 0]])

In [165]:
output = torch.mm(adj.squeeze(), features.squeeze())
print(output.shape)
print(output)

torch.Size([16, 6])
tensor([[ 19., 493.,  35.,   4.,   0.,   2.],
        [ 15., 426.,  35.,   6.,   0.,   0.],
        [ 13., 383.,  30.,   4.,   0.,   1.],
        [ 16., 476.,  35.,   4.,   1.,   1.],
        [ 17., 477.,  35.,   4.,   1.,   1.],
        [ 12., 450.,  37.,   6.,   0.,   0.],
        [ 15., 467.,  37.,   5.,   1.,   0.],
        [ 13., 449.,  37.,   5.,   1.,   0.],
        [ 14., 433.,  37.,   5.,   1.,   0.],
        [ 15., 433.,  42.,   6.,   1.,   0.],
        [ 15., 417.,  37.,   5.,   0.,   1.],
        [ 16., 416.,  37.,   5.,   0.,   1.],
        [ 16., 449.,  37.,   5.,   0.,   1.],
        [ 14., 349.,  36.,   6.,   0.,   0.],
        [ 17., 433.,  36.,   4.,   0.,   2.],
        [ 19., 443.,  35.,   4.,   0.,   2.]])
