In [84]:
import networkx as nx
import dgl
import pandas as pd
import numpy as np

跟 select action相同，只是label從action改成new_color

In [85]:
feature_df = pd.read_csv('data/change_color_df.csv')
structure_df = pd.read_csv('data/graph_structure.csv')

In [86]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

col = feature_df.columns.tolist()
col.remove('color')
new_col = ['color_red', 'color_black', 'color_green', 'color_blue'] + col
print(new_col)



ct = ColumnTransformer([('color', OneHotEncoder(), [3])], remainder='passthrough')
feature_onehot = np.array(ct.fit_transform(feature_df))
feature_onehot_df = pd.DataFrame(feature_onehot, columns=new_col)
feature_onehot_df

['color_red', 'color_black', 'color_green', 'color_blue', 'session', 'uid', 'round', 'score', 'num_of_neighbor', 'hist_color', 'hist_neighbor', 'hist_skip', 'new_color']


Unnamed: 0,color_red,color_black,color_green,color_blue,session,uid,round,score,num_of_neighbor,hist_color,hist_neighbor,hist_skip,new_color
0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,6.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,1.0,1.0,2.0,1.0,0.0,6.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,1.0,1.0,3.0,1.0,0.0,6.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,1.0,4.0,1.0,0.0,6.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,1.0,1.0,5.0,1.0,0.0,6.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2148,0.0,1.0,0.0,0.0,13.0,16.0,6.0,100.0,7.0,0.2,0.0,0.8,996.0
2149,0.0,0.0,1.0,0.0,13.0,17.0,6.0,100.0,7.0,0.2,0.2,0.6,996.0
2150,1.0,0.0,0.0,0.0,13.0,18.0,6.0,89.0,9.0,0.4,0.0,0.6,996.0
2151,0.0,0.0,0.0,1.0,13.0,19.0,6.0,100.0,7.0,0.0,0.6,0.4,996.0


In [87]:
def create_graph(session, df):  # df size = graph size
    size = len(df)+1
    G = nx.Graph()
    nodes = range(1, size)
    G.add_nodes_from(nodes)
    edge_list = []
    for uid in range(1, size):
        neighbor_list = df[df['uid'] == uid].iloc[0]['linked'].split(',')
        edge_list = [(int(uid), int(neighbor)) for neighbor in neighbor_list]
        G.add_edges_from(edge_list)
    return G

def add_feature(graph, f_df):
    feature_col_list = ['color_red', 'color_black', 'color_green', 'color_blue', 'score', 'num_of_neighbor', 'hist_color', 'hist_neighbor', 'hist_skip']
    for n in graph.nodes():
        for f in feature_col_list:
            graph.nodes[n][f] = f_df[f_df['uid']==n][f].tolist()[0]    # 取出uid符合的user的f欄位，從series型態轉成list再取值
    return graph

In [88]:
graph_list = []
label_list = []
for session in range(1, 14):
    s_all_df = structure_df[structure_df['session'] == session]
    for round in s_all_df['round'].unique():
        
        s_df = s_all_df[s_all_df['round'] == round]
        f_df = feature_onehot_df.iloc[s_df.index]
        graph = create_graph(session, s_df)
        graph = add_feature(graph, f_df)
        graph_list.append(graph)

        labels = f_df['new_color'].tolist()
        label_list.append(labels)
len(graph_list)

106

---

In [89]:
import math
import time
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.parameter import Parameter
from torch.nn import functional as F

In [90]:
class graphDataset(Dataset):
    def __init__(self, graph_list, label_list):
        feature_matrix_list = []
        for g in graph_list:
            feature_matrix = [list(g.nodes[j].values()) for j in range(1, g.number_of_nodes()+1)]
            feature_matrix_list.append(torch.tensor(feature_matrix, dtype=torch.float))
        
        self.feature_matrix = feature_matrix_list
        
        adj_matrix_list = []
        for g in graph_list:
            adj_matrix = nx.adjacency_matrix(g).todense()
            adj_matrix_list.append(torch.tensor(adj_matrix, dtype=torch.float))
        self.adj_matrix = adj_matrix_list

        labels = []
        for i in range(len(label_list)):
            labels.append(torch.tensor(label_list[i], dtype=torch.int64))
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        features = self.feature_matrix[idx]
        adj_matrix = self.adj_matrix[idx]
        labels = self.labels[idx]

        return features, adj_matrix, labels

In [91]:
class GraphConvolutionalLayer(nn.modules.Module):
    def __init__(self, in_feature, out_feature, bias=True):
        super(GraphConvolutionalLayer, self).__init__()
        self.in_feature = in_feature
        self.out_feature = out_feature
        self.weight = Parameter(torch.FloatTensor(in_feature, out_feature))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_feature))
        else:
            self.bias = None
    def forward(self, adj_matrix, features):
        output = torch.mm(adj_matrix, features)
        output = torch.mm(output, self.weight)
        if(self.bias is not None):
            return output + self.bias
        else:
            return output
    

class GCN_Model(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, dropout):
        super(GCN_Model, self).__init__()
        self.gcn = GraphConvolutionalLayer(in_dim, hidden_dim)
        self.linear = torch.nn.Linear(hidden_dim, out_dim)
        self.dropout = dropout
    def forward(self, feature, adj_matrix):
        x = torch.nn.functional.relu(self.gcn(adj_matrix, feature))
        x = torch.nn.functional.dropout(x, self.dropout, training=self.training)
        
        x = self.linear(x)
        output = torch.nn.functional.softmax(x, dim=1)
        return output

In [92]:
def accuracy(output, labels):
    preds = output.max(1)[1].type_as(labels)
    correct = preds.eq(labels).double().sum()
    return correct / len(labels)

在train step只針對new_color != 996的那些node做loss function計算 -> 利用index取出來?

In [148]:
def train(model, train_dataloader, test_dataloader, optimizer, loss_fn, epoch, eval=True):
    t = time.time()
    loss_hist = []
    for _ in range(epoch):
        model.train()
        for features, adj_matrix, labels in iter(train_dataloader):
            features = features.squeeze()
            adj_matrix = adj_matrix.squeeze()
            labels = labels.squeeze()
            
            output = model(features, adj_matrix)
            # 挑出label == 996的，不參與loss計算
            excluded_index = []
            for i in range(len(labels)):
                if(labels[i] != 996):
                    excluded_index.append(i)
            # print(len(labels))
            # print(labels)
            # real_labels = torch.tensor([])
            # real_output = torch.tensor([])
            if(excluded_index):
                real_labels = torch.index_select(labels, dim=0, index=torch.tensor(excluded_index))
                # print(len(real_labels))
                real_output = torch.index_select(output, dim=0, index=torch.tensor(excluded_index))
                
                loss = loss_fn(real_output, real_labels)
                loss_hist.append(float(loss))

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        
        if(eval):
            model.eval()
            total_loss = 0
            for features, adj_matrix, labels in iter(test_dataloader):
                features = features.squeeze()
                adj_matrix = adj_matrix.squeeze()
                labels = labels.squeeze()
                output = model(features, adj_matrix)
                # 挑出label == 996的，不參與loss計算
                excluded_index = []
                for i in range(len(labels)):
                    if(labels[i] != 996):
                        excluded_index.append(i)
                if(excluded_index):
                    real_labels = torch.index_select(labels, dim=0, index=torch.tensor(excluded_index))
                    # print(len(real_labels))
                    real_output = torch.index_select(output, dim=0, index=torch.tensor(excluded_index))
                    total_loss += float(loss_fn(real_output, real_labels))
            print('eval avg loss: ', total_loss/len(test_dataloader))
            print('acc: ', float(accuracy(output, labels)))
            print('='*80)
    return loss_hist

In [152]:
train_dataset = graphDataset(graph_list[:90], label_list[:90])
test_dataset = graphDataset(graph_list[90:], label_list[90:])

train_dataloader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=1)

  adj_matrix = nx.adjacency_matrix(g).todense()
  labels.append(torch.tensor(label_list[i], dtype=torch.int64))


In [153]:
model = GCN_Model(9, 9, 4, 0.2)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

l = train(model, train_dataloader, test_dataloader, optimizer, loss_fn, epoch=20)

eval avg loss:  1.1825333759188652
acc:  0.1
eval avg loss:  1.1816955730319023
acc:  0.1
eval avg loss:  1.1922904998064041
acc:  0.1
eval avg loss:  1.1830761581659317
acc:  0.1
eval avg loss:  1.2009883895516396
acc:  0.1
eval avg loss:  1.200197272002697
acc:  0.1
eval avg loss:  1.199930064380169
acc:  0.1
eval avg loss:  1.1996429115533829
acc:  0.1
eval avg loss:  1.199579194188118
acc:  0.1
eval avg loss:  1.1994756311178207
acc:  0.1
eval avg loss:  1.1993102580308914
acc:  0.1
eval avg loss:  1.1991576924920082
acc:  0.1
eval avg loss:  1.1993700861930847
acc:  0.1
eval avg loss:  1.1989977061748505
acc:  0.1
eval avg loss:  1.1988689824938774
acc:  0.1
eval avg loss:  1.1988288089632988
acc:  0.1
eval avg loss:  1.1989382430911064
acc:  0.1
eval avg loss:  1.198970913887024
acc:  0.1
eval avg loss:  1.1990406513214111
acc:  0.1
eval avg loss:  1.1824807971715927
acc:  0.1


In [154]:
f = open('color_change.txt', 'w')
for i in l:
    f.write(str(i))
    f.write('\n')