# Data Process

In [1]:
import os
import numpy as np
import pandas as pd
import copy
from tqdm.notebook import tqdm

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error

import torch
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, GAE, VGAE
from torch_geometric.utils import train_test_split_edges
from torch_geometric.data import Data



In [2]:
sample_path = './data/'
chid_dict_file = 'sample_idx_map.npy'
cdtx_file = 'sample_zip_if_cca_cdtx0001_hist.csv'
cust_f_file = 'sample_zip_if_cca_cust_f.csv'

In [3]:
idx_map = np.load(os.path.join(sample_path, chid_dict_file), allow_pickle=True).tolist()
df_cdtx = pd.read_csv(os.path.join(sample_path, cdtx_file)) # 交易記錄檔
df_cust_f = pd.read_csv(os.path.join(sample_path, cust_f_file)) # user feature
df_cust_f.drop_duplicates(ignore_index=True, inplace=True)

print(len(idx_map), df_cdtx.shape, df_cust_f.shape)

50000 (6654938, 10) (1176172, 32)


In [4]:
l = len(idx_map)
for i, j  in tqdm(enumerate(set(df_cdtx.mcc))):
    idx_map[j] = i+l

df_cdtx.chid = df_cdtx.chid.map(idx_map)
df_cdtx.mcc = df_cdtx.mcc.map(idx_map)

df_cust_f.chid = df_cust_f.chid.map(idx_map)

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…




In [5]:
category_cols = ['masts', 'educd', 'naty', 'trdtp', 'poscd', 'cuorg']

numeric_cols = sorted(set(df_cust_f.columns) - set(category_cols) - set(['chid', 'data_ym', 'data_dt']), 
                      key=list(df_cust_f.columns).index)

In [6]:
mapper = {col: {value: index for index, value in enumerate(df_cust_f[col].unique())} 
          for col in category_cols}

df_cust_f.loc[:,category_cols] = df_cust_f[category_cols].apply(lambda x: x.map(mapper[x.name]))

print(df_cust_f.shape)
df_cust_f.head(2)

(1176172, 32)


Unnamed: 0,chid,data_ym,monin,wrky,first_mob,data_dt,masts,educd,naty,trdtp,...,constant_u2_ind,constant_u3_ind,constant_u4_ind,constant_l2_ind,constant_l3_ind,constant_l4_ind,constant_change,growth_rate,monotone_up,monotone_down
0,8477,2017-12-01,265153,1,94.0,2018-01-01,0,0,0,0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.2,2.0,0.0
1,8477,2018-01-01,265153,1,95.0,2018-02-01,0,0,0,0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.1,4.0,0.0


In [7]:
df_cust_f_pre = df_cust_f[df_cust_f.data_ym > '2017-12-01']

In [8]:
df_cdtx['month'] = df_cdtx.csmdt.apply(lambda x: x[:-3]+'-01')
#df_cust_f_pre['month'] = df_cust_f_pre.data_ym.apply(lambda x: x[:5]+f'{int(x[5:7])-1:02d}'+ x[7:] if x[5:7]!='01' else f'{int(x[:4])-1}'+'-12'+x[7:])

def make_edges_symmetry(edge_index):
    new_edge = []
    for i in edge_index:
        new_edge.append(np.array([i[1],i[0]]))
    new_edge = np.concatenate([new_edge],0)
    print(new_edge.shape, edge_index.shape)
    return torch.LongTensor(np.concatenate([edge_index,new_edge], 0).T)

edge_dict = {}
for i, j  in enumerate(sorted(df_cdtx.month.unique())):
    edge_index = df_cdtx[df_cdtx.month==j].iloc[:,[2,5]].drop_duplicates().to_numpy()
    edge_index = make_edges_symmetry(edge_index)
    
    edge_dict[i] = edge_index

(111718, 2) (111718, 2)
(111384, 2) (111384, 2)
(108539, 2) (108539, 2)
(114384, 2) (114384, 2)
(118297, 2) (118297, 2)
(112677, 2) (112677, 2)
(123385, 2) (123385, 2)
(122938, 2) (122938, 2)
(118927, 2) (118927, 2)
(123481, 2) (123481, 2)
(125585, 2) (125585, 2)
(134040, 2) (134040, 2)
(133746, 2) (133746, 2)
(125068, 2) (125068, 2)
(129703, 2) (129703, 2)
(133705, 2) (133705, 2)
(138664, 2) (138664, 2)
(136600, 2) (136600, 2)
(145034, 2) (145034, 2)
(145165, 2) (145165, 2)
(141280, 2) (141280, 2)
(146976, 2) (146976, 2)
(148163, 2) (148163, 2)
(151831, 2) (151831, 2)
(154337, 2) (154337, 2)


In [9]:
## 取得整個月的 objam 
temp_cdtx = df_cdtx.groupby(['chid', 'month']).sum()
df_cdtx_objam = pd.DataFrame(list(map(list, temp_cdtx.index)), columns=['chid', 'data_ym'])
df_cdtx_objam['objam'] = np.ma.log(temp_cdtx.objam.values).filled(0)

In [10]:
df_cust_f_pre = df_cust_f_pre.merge(df_cdtx_objam, 
                                    how='left', 
                                    left_on=['chid', 'data_ym'], 
                                    right_on=['chid', 'data_ym']).fillna(0)

df_cust_f_pre.shape

(1129463, 33)

In [11]:
ignore_cols = ['data_ym', 'data_dt', 'month']
category_cols = ['chid'] + category_cols
numeric_cols = sorted(set(df_cust_f_pre.columns) - set(category_cols) - set(ignore_cols), 
                      key=list(df_cust_f_pre.columns).index)

print(len(ignore_cols), ignore_cols, '\n')
print(len(category_cols), category_cols, '\n')
print(len(numeric_cols), numeric_cols)

3 ['data_ym', 'data_dt', 'month'] 

7 ['chid', 'masts', 'educd', 'naty', 'trdtp', 'poscd', 'cuorg'] 

24 ['monin', 'wrky', 'first_mob', 'cycam', 'slam', 'sum_area_c', 'sum_u2_ind', 'sum_u3_ind', 'sum_u4_ind', 'sum_l2_ind', 'sum_l3_ind', 'sum_l4_ind', 'constant_area_c', 'constant_u2_ind', 'constant_u3_ind', 'constant_u4_ind', 'constant_l2_ind', 'constant_l3_ind', 'constant_l4_ind', 'constant_change', 'growth_rate', 'monotone_up', 'monotone_down', 'objam']


In [12]:
x_scaler = MinMaxScaler()
df_cust_f_pre[numeric_cols] = x_scaler.fit_transform(df_cust_f_pre[numeric_cols])

In [13]:
x_feature_dict = {}
for i, j in enumerate(sorted(df_cust_f_pre.data_ym.unique())):
    temp = df_cust_f_pre[df_cust_f_pre.data_ym == j].copy()[category_cols+numeric_cols].to_numpy()
    x_feature = np.zeros([len(idx_map), temp.shape[1]])
    for k in set(temp[:,0]):
        x_feature[int(k)] = temp[np.where(temp[:,0]==k)[0]]

    x_feature_dict[i] = torch.Tensor(x_feature[:,1:])

In [14]:
def feature_index(x, feature_cols):
    feature_idx = {}
    x_cols = list(x.columns)
    for i in feature_cols:
        feature_idx[i] = x_cols.index(i)
        
    return feature_idx

In [15]:
category_cols.remove('chid')
category_dict = feature_index(df_cust_f_pre[category_cols+numeric_cols], category_cols)
numeric_dict = feature_index(df_cust_f_pre[category_cols+numeric_cols], numeric_cols)

In [16]:
list_chid = sorted(df_cust_f.chid.unique())
list_month = sorted(df_cust_f.data_dt.unique())[12:]

df_full_y_sum = pd.DataFrame({
    'chid': list_chid*len(list_month),
}).sort_values(by='chid', ignore_index=True)
df_full_y_sum['data_ym'] = list_month*len(list_chid)

df_full_y_sum.shape

(600000, 2)

In [17]:
## join objam

df_full_y_sum = df_full_y_sum.merge(df_cdtx_objam, 
                                    how='left', 
                                    left_on=['chid', 'data_ym'], 
                                    right_on=['chid', 'data_ym']).fillna(0)

df_full_y_sum.shape

(600000, 3)

In [18]:
df_full_y_sum.sort_values(by=['data_ym','chid'], inplace=True)
y_dict = {}
for i,j in enumerate(sorted(df_full_y_sum.data_ym.unique())):
    temp_y = df_full_y_sum[df_full_y_sum.data_ym == j].copy()
    y_dict[i] = torch.from_numpy(temp_y['objam'].to_numpy()).float()

# Pre-training model

In [19]:
class Encoder(torch.nn.Module):
    def __init__(self,in_channels, out_channels, layer_dims, category_cols, category_dims, window_size=12):
        super(Encoder, self).__init__()
        self.window_size = window_size
        self.embedding_dict = torch.nn.ModuleDict({category_col:torch.nn.Embedding(category_dim,64)
                                                   for category_col, category_dim in zip(category_cols,category_dims)})
        self.gcn1_dict = torch.nn.ModuleDict({str(i):GCNConv(in_channels, 2 * out_channels, cached=True)
                                                   for i in range(window_size)})
        self.gcn2_dict = torch.nn.ModuleDict({str(i):GCNConv(2 * out_channels, out_channels, cached=True)
                                                   for i in range(window_size)})
        
        self.rnn = torch.nn.GRU(out_channels, out_channels, 1, batch_first=True)
        

    def forward(self, x, edge_index):
        x_ = []
        numeric_idx = torch.LongTensor(list(numeric_dict.values()))
        for i in x:
            category_embeddings = [self.embedding_dict[item[0]](i[:,item[1]].long()) for item in category_dict.items()]
            category_embeddings = torch.cat(category_embeddings, -1)
            x_.append(torch.cat([category_embeddings, i[:,numeric_idx]], -1))
        
        gcn_embeddings = [self.gcn1_dict[str(i)](x_[i], edge_index[i]).relu() for i in range(self.window_size)]
        gcn_embeddings2 = [self.gcn2_dict[str(i)](gcn_embeddings[i], edge_index[i]).unsqueeze(1) for i in range(self.window_size)]
        gcn_embeddings2 = torch.cat(gcn_embeddings2, 1)
        
        _ ,gcn_embeddings2 = self.rnn(gcn_embeddings2)
        
        return gcn_embeddings2.squeeze(0) 

In [41]:
category_dims = [df_cust_f[feat].nunique() for feat in category_cols]

embedding_size = 64
layer_dims = [256, 128, 1]
input_dim = len(category_dict)*64 + len(numeric_dict)

epochs = 400
batch_size = 2048
learning_rate = 0.001

In [21]:
def sample_neg_edges(pos_edges, num_nodes, n_user):
    row , col = pos_edges
    mask = row < col
    row, col = row[mask], col[mask]
    neg_adj_mask = torch.ones(num_nodes, num_nodes, dtype=torch.uint8)
    neg_adj_mask = neg_adj_mask.triu(diagonal=1).to(torch.bool)
    neg_adj_mask[row, col] = 0
    neg_row, neg_col = neg_adj_mask[:n_user, n_user:].nonzero(as_tuple=False).t()
    neg_col = neg_col+n_user
    perm = torch.randperm(row.size(0))
    neg_row, neg_col = neg_row[perm], neg_col[perm]
    neg_edge_index = torch.cat([neg_row.view(1,-1), neg_col.view(1,-1)],0)
    
    return make_edges_symmetry(neg_edge_index.T)

neg_edges_dict = {}
for i,j in enumerate(edge_dict.values()):
    neg_edges_dict[i] = sample_neg_edges(j, 50502, 50000)

(111718, 2) torch.Size([111718, 2])
(111384, 2) torch.Size([111384, 2])
(108539, 2) torch.Size([108539, 2])
(114384, 2) torch.Size([114384, 2])
(118297, 2) torch.Size([118297, 2])
(112677, 2) torch.Size([112677, 2])
(123385, 2) torch.Size([123385, 2])
(122938, 2) torch.Size([122938, 2])
(118927, 2) torch.Size([118927, 2])
(123481, 2) torch.Size([123481, 2])
(125585, 2) torch.Size([125585, 2])
(134040, 2) torch.Size([134040, 2])
(133746, 2) torch.Size([133746, 2])
(125068, 2) torch.Size([125068, 2])
(129703, 2) torch.Size([129703, 2])
(133705, 2) torch.Size([133705, 2])
(138664, 2) torch.Size([138664, 2])
(136600, 2) torch.Size([136600, 2])
(145034, 2) torch.Size([145034, 2])
(145165, 2) torch.Size([145165, 2])
(141280, 2) torch.Size([141280, 2])
(146976, 2) torch.Size([146976, 2])
(148163, 2) torch.Size([148163, 2])
(151831, 2) torch.Size([151831, 2])
(154337, 2) torch.Size([154337, 2])


In [32]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
model = GAE(Encoder(input_dim, embedding_size, layer_dims, category_cols, category_dims)).to(device)
x_feature = [i.float().to(device) for i in x_feature_dict.values()]
pos_edge_index = [i.to(device) for i in edge_dict.values()]
neg_edges_index = [i.to(device) for i in neg_edges_dict.values()]
y = [i.to(device) for i in y_dict.values()]

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [23]:
def train():
    model.train()
    
    loss_ = 0
    for i in range(9):
        optimizer.zero_grad()
        x = x_feature[i:i+12]
        z = model.encode(x, pos_edge_index[i:i+12])
        loss = model.recon_loss(z, pos_edge_index[i+12], neg_edges_index[i+12])
        loss_ += loss.item()
        loss.backward()
        optimizer.step()
    return loss_/9

def test(pos_edge_index, neg_edge_index):
    auc , ap = 0, 0
    model.eval()
    with torch.no_grad():
        for i in range(9):
            x = x_feature[i:i+12]
            z = model.encode(x, pos_edge_index[i:i+12])
            auc_, ap_ = model.test(z, pos_edge_index[i+12], neg_edge_index[i+12])
            auc += auc_
            ap += ap_
    return auc/9, ap/9

In [24]:
for epoch in range(1, 400 + 1):
    loss = train()
    print(epoch, loss)

1 1.8995879292488098
2 0.8900182313389249
3 0.5591222180260552
4 0.5036832491556803
5 0.4693643848101298
6 0.44019139144155717
7 0.41778499881426495
8 0.40596484806802535
9 0.38299890359242755
10 0.45180339614550274
11 0.5066078735722436
12 0.42458731267187333
13 0.37789811028374565
14 0.3484479652510749
15 0.3192361427678002
16 0.32598010036680436
17 0.30375392238299054
18 0.2830920186307695
19 0.274363163444731
20 0.25992855429649353
21 0.2620842622386085
22 0.23225410448180306
23 0.2056346899933285
24 0.30887554420365226
25 0.24378892613781822
26 0.20992038481765324
27 0.18104298578368294
28 0.20112165974246132
29 0.16838762329684365
30 0.16405368347962698
31 0.14663105871942309
32 0.14824527584844166
33 0.1269316921631495
34 0.19634524981180826
35 0.4488311145040724
36 0.22234952615367043
37 0.18042585915989345
38 0.14279177288214365
39 0.13157794127861658
40 0.11196287473042806
41 0.16840918196572197
42 0.1386580409275161
43 0.11131610307428572
44 0.09601622902684742
45 0.08694332

KeyboardInterrupt: 

In [25]:
torch.save(model.encoder.state_dict(), 'Dynamic_GCNEncoder_2')

# DownStream Model

In [26]:
import copy
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error

In [42]:
def feature_index(x, feature_cols):
    feature_idx = {}
    x_cols = list(x.columns)
    for i in feature_cols:
        feature_idx[i] = x_cols.index(i)
        
    return feature_idx

def Linear_block(in_dim, out_dim):
    block = torch.nn.Sequential(torch.nn.Linear(in_dim, out_dim),
                                torch.nn.ReLU())
    return block

class MLP(torch.nn.Module):
    def __init__(self, category_cols, category_dims, category_dict, numeric_dict, input_dim, layer_dims, embedding_dim):
        super(MLP, self).__init__()
        self.category_dict = category_dict
        self.numeric_dict = numeric_dict
        
        self.out_dims = [input_dim+256, *layer_dims]
        Linear_blokcs = [Linear_block(in_dim, out_dim)
                         for in_dim, out_dim in zip(self.out_dims, self.out_dims[1:])]
        self.model = torch.nn.Sequential(*Linear_blokcs)
        self.embedding_dict = torch.nn.ModuleDict({category_col:torch.nn.Embedding(category_dim,
                                                                                   embedding_dim)
                                                   for category_col, category_dim in zip(category_cols,category_dims)})

        self.pretrain_model = Encoder(input_dim, embedding_dim, layer_dims, category_cols, category_dims)
        self.pretrain_model.load_state_dict(torch.load('Dynamic_GCNEncoder_2'))
        self.pretrain_model.train()

        
    def forward(self, x, x_, edge_index, index):
        
        z = self.pretrain_model(x, edge_index)[index:index+1000]
        category_embeddings = [self.pretrain_model.embedding_dict[item[0]](x_[:,item[1]].long()) for item in self.category_dict.items()]
        category_embeddings = torch.cat(category_embeddings, -1)
        
        numeric_idx = torch.Tensor(list(self.numeric_dict.values())).long()
        
        x = torch.cat([z,category_embeddings, x_[:,numeric_idx]], -1)
        x = self.model(x)
        
        return x.view(-1)

In [43]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = MLP(category_cols, category_dims, category_dict, numeric_dict, input_dim, layer_dims, embedding_size).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = torch.nn.MSELoss()

In [44]:
early_stop = 20

best_loss = 1e10
early_cnt = 0
RMSE = []
for epoch in tqdm(range(epochs)):
    
    train_loss = 0
    test_loss = 0
    train_output = np.array([])
    train_y = np.array([])
    test_output = np.array([])
    test_y = np.array([])
    
    for i in range(1,10):
        model.train()
        
        x = x_feature[i:i+12]
        train_dataset = TensorDataset(x[-1][:50000],y[i])
        train_loader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=1000)
        for index , (j, k) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(x, j, pos_edge_index[i:i+12], index)
            loss = criterion(output, k)
            train_loss += loss.item()
            train_output = np.concatenate([train_output,output.cpu().detach().numpy().reshape(-1)])
            train_y = np.concatenate([train_y,k.cpu().detach().numpy().reshape(-1)])

            loss.backward()
            optimizer.step()
        
    for i in range(10,12):

        model.eval()        
        x = x_feature[i:i+12]
        test_dataset = TensorDataset(x[-1][:50000],y[i])
        test_loader = DataLoader(dataset=test_dataset, shuffle=True, batch_size=1000)
        for index , (j, k) in enumerate(test_loader):
            
            output = model(x, j, pos_edge_index[i:i+12], index)
            loss = criterion(output, k)
            test_loss += loss.item()
            test_output = np.concatenate([test_output,output.cpu().detach().numpy().reshape(-1)])
            test_y = np.concatenate([test_y,k.cpu().detach().numpy().reshape(-1)])

    #train_loss = np.sqrt(train_loss/len(train_loader))
    #test_loss = np.sqrt(test_loss/len(test_loader))
    
    train_output, train_y = np.e**train_output, np.e**train_y
    train_RMSE = mean_squared_error(train_output, train_y, squared=False)
    train_mean = mean_absolute_error(train_output, train_y)
    train_median = median_absolute_error(train_output, train_y)
    
    test_output, test_y = np.e**test_output, np.e**test_y
    test_RMSE = mean_squared_error(test_output, test_y, squared=False)
    test_mean = mean_absolute_error(test_output, test_y)
    test_median = median_absolute_error(test_output, test_y)
    
    print(f'epoch:{epoch}\ntrain loss:{train_RMSE:.0f},test loss:{test_RMSE:.0f}\ntrain MAE(mean):{train_mean:.0f},test MAE(mean):{test_mean:.0f}\ntrain MAE(median):{train_median:.0f}, test MAE(median):{test_median:.0f}')
    
    if test_RMSE <= best_loss:
        best_model_params = copy.deepcopy(model.state_dict())
        best_loss = test_RMSE
        print('\tBetter!')
        early_cnt = 0
    else:
        early_cnt += 1
    
    if early_cnt >= early_stop:
        break

model.load_state_dict(best_model_params)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=400.0), HTML(value='')))

epoch:0
train loss:397067,test loss:418770
train MAE(mean):62938,test MAE(mean):65473
train MAE(median):12198, test MAE(median):12377
	Better!
epoch:1
train loss:510965,test loss:416258
train MAE(mean):59568,test MAE(mean):65133
train MAE(median):11148, test MAE(median):11135
	Better!
epoch:2
train loss:709982,test loss:419378
train MAE(mean):59856,test MAE(mean):65298
train MAE(median):10969, test MAE(median):11501
epoch:3
train loss:3277633,test loss:426143
train MAE(mean):63774,test MAE(mean):63555
train MAE(median):10963, test MAE(median):11377
epoch:4
train loss:3047643,test loss:412322
train MAE(mean):63905,test MAE(mean):63243
train MAE(median):10874, test MAE(median):10933
	Better!
epoch:5
train loss:4730434,test loss:422308
train MAE(mean):66198,test MAE(mean):63231
train MAE(median):10898, test MAE(median):11255
epoch:6
train loss:5032283,test loss:409894
train MAE(mean):67540,test MAE(mean):65072
train MAE(median):10874, test MAE(median):11227
	Better!
epoch:7
train loss:803

<All keys matched successfully>

In [46]:
train_output = np.array([])
train_y = np.array([])
test_output = np.array([])
test_y = np.array([])

model.eval()

for i in range(1,10):
    x = x_feature[i:i+12]
    train_dataset = TensorDataset(x[-1][:50000],y[i])
    train_loader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=1000)
    for index , (j, k) in enumerate(train_loader):
        output = model(x, j, pos_edge_index[i:i+12], index)
        loss = criterion(output, k)
        train_loss += loss.item()
        train_output = np.concatenate([train_output,output.cpu().detach().numpy().reshape(-1)])
        train_y = np.concatenate([train_y,k.cpu().detach().numpy().reshape(-1)])

        
for i in range(10,12):     
    x = x_feature[i:i+12]
    test_dataset = TensorDataset(x[-1][:50000],y[i])
    test_loader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=1000)
    for index , (j, k) in enumerate(test_loader):
        output = model(x, j, pos_edge_index[i:i+12], index)
        loss = criterion(output, k)
        test_loss += loss.item()
        test_output = np.concatenate([test_output,output.cpu().detach().numpy().reshape(-1)])
        test_y = np.concatenate([test_y,k.cpu().detach().numpy().reshape(-1)])



train_output, train_y = np.e**train_output, np.e**train_y
test_output, test_y = np.e**test_output, np.e**test_y

In [47]:
print('train\tRMSE: {:.0f} MAE(mean): {:.0f} MAE(median): {:.0f}'.format(
    mean_squared_error(train_y, train_output, squared=False), 
    mean_absolute_error(train_y, train_output), 
    median_absolute_error(train_y, train_output)
))
print('test\tRMSE: {:.0f} MAE(mean): {:.0f} MAE(median): {:.0f}'.format(
    mean_squared_error(test_y, test_output, squared=False), 
    mean_absolute_error(test_y, test_output), 
    median_absolute_error(test_y, test_output)
))

train	RMSE: 392762 MAE(mean): 56391 MAE(median): 10772
test	RMSE: 389490 MAE(mean): 61931 MAE(median): 11111


In [48]:
df_out = df_full_y_sum[['chid']].iloc[-100000:].copy()
df_out['true'] = test_y
df_out['pred'] = test_output

In [49]:
df_out.to_csv('GCN+RNN_output.csv', index=False, encoding='utf-8')