In [1]:
from sklearn.metrics import (adjusted_rand_score, normalized_mutual_info_score, 
                             silhouette_score, calinski_harabasz_score,
                             davies_bouldin_score)
import logging
import numpy as np
from tqdm import tqdm
import torch
from sklearn.preprocessing import LabelEncoder
from graphmae.utils import (
    
    build_args,
    create_optimizer,
    set_random_seed,
    TBLogger,
    get_current_lr,
    load_best_configs,

    
)
from collections import Counter
from graphmae.datasets.data_util import load_dataset
from graphmae.evaluation import node_classification_evaluation
from graphmae.models import build_model
from ogb.nodeproppred import DglNodePropPredDataset
from sklearn.cluster import KMeans
def kMeans_use(embedding,cluster_number):
    kmeans = KMeans(n_clusters=cluster_number,
                init="k-means++",
                random_state=0)
    pred = kmeans.fit_predict(embedding)
    return pred
import argparse
parser = argparse.ArgumentParser(description="GAT")
parser.add_argument("--seeds", type=int, nargs="+", default=[0])
parser.add_argument("--dataset", type=str, default="cora")
parser.add_argument("--device", type=int, default=-1)
parser.add_argument("--max_epoch", type=int, default=200,
                    help="number of training epochs")
parser.add_argument("--warmup_steps", type=int, default=-1)

parser.add_argument("--num_heads", type=int, default=4,
                    help="number of hidden attention heads")
parser.add_argument("--num_out_heads", type=int, default=1,
                    help="number of output attention heads")
parser.add_argument("--num_layers", type=int, default=2,
                    help="number of hidden layers")
parser.add_argument("--num_hidden", type=int, default=256,
                    help="number of hidden units")
parser.add_argument("--residual", action="store_true", default=False,
                    help="use residual connection")
parser.add_argument("--in_drop", type=float, default=.2,
                    help="input feature dropout")
parser.add_argument("--attn_drop", type=float, default=.1,
                    help="attention dropout")
parser.add_argument("--norm", type=str, default=None)
parser.add_argument("--lr", type=float, default=0.005,
                    help="learning rate")
parser.add_argument("--weight_decay", type=float, default=5e-4,
                    help="weight decay")
parser.add_argument("--negative_slope", type=float, default=0.2,
                    help="the negative slope of leaky relu for GAT")
parser.add_argument("--activation", type=str, default="prelu")
parser.add_argument("--mask_rate", type=float, default=0.5)
parser.add_argument("--drop_edge_rate", type=float, default=0.0)
parser.add_argument("--replace_rate", type=float, default=0.0)

parser.add_argument("--encoder", type=str, default="gat")
parser.add_argument("--decoder", type=str, default="gat")
parser.add_argument("--loss_fn", type=str, default="byol")
parser.add_argument("--alpha_l", type=float, default=2, help="`pow`inddex for `sce` loss")
parser.add_argument("--optimizer", type=str, default="adam")

parser.add_argument("--max_epoch_f", type=int, default=30)
parser.add_argument("--lr_f", type=float, default=0.001, help="learning rate for evaluation")
parser.add_argument("--weight_decay_f", type=float, default=0.0, help="weight decay for evaluation")
parser.add_argument("--linear_prob", action="store_true", default=False)

parser.add_argument("--load_model", action="store_true")
parser.add_argument("--save_model", action="store_true")
parser.add_argument("--use_cfg", action="store_true")
parser.add_argument("--logging", action="store_true")
parser.add_argument("--scheduler", action="store_true", default=False)
parser.add_argument("--concat_hidden", action="store_true", default=False)

# for graph classification
parser.add_argument("--pooling", type=str, default="mean")
parser.add_argument("--deg4feat", action="store_true", default=False, help="use node degree as input feature")
parser.add_argument("--batch_size", type=int, default=32)

2022-09-11 20:30:31,015 - INFO - Enabling RDKit 2022.03.5 jupyter extensions


_StoreAction(option_strings=['--batch_size'], dest='batch_size', nargs=None, const=None, default=32, type=<class 'int'>, choices=None, help=None, metavar=None)

In [2]:
import pickle as pkl
from scanpy import read_10x_h5
import networkx as nx
import numpy as np
import scipy.sparse as sp
import torch
import plotly.express as px
import pandas as pd
import scanpy as sc
import scipy as sci

def drawPicture(dataframe,col_name, row_name,colorattribute,save_file,celltype_colors =  ("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999", "#E41A80",
    "#377F1C", "#4DAFAE", "#984F07", "#FF7F64", "#FFFF97", "#A6568C", "#F78223", "#9999FD", "#E41AE4", "#377F80", "#4DB012",
    "#984F6B", "#FF7FC8",  "#A656F0", "#F78287", "#999A61", "#E41B48", "#377FE4", "#4DB076", "#984FCF", "#FF802C",
    "#00005F", "#A65754", "#F782EB", "#999AC5", "#E41BAC", "#378048", "#4DB0DA", "#985033", "#FF8090", "#0000C3", "#A657B8",
    "#F7834F", "#999B29", "#E41C10", "#3780AC", "#4DB13E", "#985097", "#FF80F4", "#000127", "#A6581C", "#F783B3", "#999B8D",
    "#E41C74", "#378110", "#4DB1A2", "#9850FB", "#FF8158", "#00018B", "#A65880", "#F78417", "#999BF1", "#E41CD8", "#378174",
    "#4DB206", "#98515F", "#FF81BC", "#0001EF", "#A658E4", "#F7847B", "#999C55", "#E41D3C", "#3781D8", "#4DB26A", "#9851C3",
    "#FF8220", "#000253", "#A65948", "#F784DF", "#999CB9", "#E41DA0", "#37823C", "#4DB2CE", "#985227", "#FF8284", "#0002B7",
    "#A659AC", "#F78543", "#999D1D", "#E41E04", "#3782A0", "#4DB332", "#98528B", "#FF82E8", "#00031B", "#A65A10", "#F785A7",
    "#999D81", "#E41E68", "#378304", "#4DB396", "#9852EF", "#FF834C", "#00037F", "#A65A74", "#F7860B", "#999DE5", "#E41ECC",
    "#378368", "#4DB3FA", "#985353", "#FF83B0", "#0003E3", "#A65AD8", "#F7866F", "#999E49", "#E41F30", "#3783CC", "#4DB45E",
    "#9853B7", "#FF8414", "#000447", "#A65B3C", "#F786D3", "#999EAD", "#E41F94", "#378430", "#4DB4C2", "#98541B", "#FF8478",
    "#0004AB", "#A65BA0", "#F78737", "#999F11", "#E41FF8", "#378494", "#4DB526", "#98547F", "#FF84DC", "#00050F", "#A65C04",
    "#F7879B", "#999F75"
    ),width = 1000,height = 1000,marker_size = 10,is_show = True,is_save = False,save_type = "pdf"):

    import plotly.express as px
    import plotly.graph_objects as go
    size = len(set(dataframe[colorattribute]))
    #dataframe.sort_values(by = colorattribute,inplace=True, ascending=True)
    length_col = max(dataframe[col_name]) - min(dataframe[col_name])

    length_row = max(dataframe[row_name]) - min(dataframe[row_name])
    max_length = max(length_col,length_row) + 2
    fig = px.scatter(dataframe, x = col_name, y= row_name,color = colorattribute,color_discrete_sequence=celltype_colors)
    fig.update_traces(marker_size=marker_size)
    fig.update_layout(
        xaxis = dict(
            tickmode = 'linear',
            tick0 = min(dataframe[row_name]),   # 起始点
            dtick = max_length  # 间距
        ),
        yaxis = dict(
            tickmode = 'linear',
            tick0 = min(dataframe[col_name]),   # 起始点
            dtick = max_length  # 间距
        ),
    #     xaxis_range = [min(it_mapping_csv.row)-10,min(it_mapping_csv.row) + max_length ],
    #     yaxis_range =[min(it_mapping_csv.col)-10,min(it_mapping_csv.col) + max_length]
    )
    fig.update_layout(
        autosize=False,
        width=width,
        height = height)
    if(is_show):
        fig.show()
    if(is_save):
        if(save_type == "pdf"):
            fig.write_image(save_file)
        if(save_type == "html"):
            fig.write_html(save_file)

2022-09-11 20:30:31,881 - INFO - Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
2022-09-11 20:30:31,882 - INFO - NumExpr defaulting to 8 threads.


In [3]:
def pretrain(model, graph, feat, optimizer, max_epoch, device, scheduler, num_classes, lr_f, weight_decay_f, max_epoch_f, linear_prob, logger=None):
    logging.info("start training..")
    graph = graph.to(device)
    x = feat.to(device)

    epoch_iter = tqdm(range(max_epoch))

    for epoch in epoch_iter:
        model.train()

        loss, loss_dict = model(graph, x)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if scheduler is not None:
            scheduler.step()

        epoch_iter.set_description(f"# Epoch {epoch}: train_loss: {loss.item():.4f}")
        if logger is not None:
            loss_dict["lr"] = get_current_lr(optimizer)
            logger.note(loss_dict, step=epoch)

        #if (epoch + 1) % 200 == 0:
            #node_classification_evaluation(model, graph, x, num_classes, lr_f, weight_decay_f, max_epoch_f, device, linear_prob, mute=True)

    # return best_model
    return model

In [4]:
import pandas as pd
#import calculate_adj
import anndata as ad
import scanpy as sc
from operator import index
import re
from scipy.spatial.distance import pdist, squareform
import pandas as pd
import numpy as np
from scipy import sparse
import dgl
import torch
folder_name = "/home/sunhang/Embedding/CCST/dataset/DLPFC/"
samplea_list = {151507,
 151508,
 151509,
 151510,
 151669,
 151670,
 151671,
 151672,
 151673,
 151674,
 151675,
 151676}
sample_name = str(151673)
#gene_exp_data_file = folder_name + sample_name + "_DLPFC_count.csv"
gene_loc_data_file = folder_name + sample_name + "_DLPFC_col_name.csv"
#save file
npz_file = folder_name + sample_name + "_DLPFC_distacne.npz"
#gene_csv = pd.read_csv(gene_exp_data_file,index_col= 0 )
gene_loc_data_csv = pd.read_csv(gene_loc_data_file,index_col=0)
gene_loc_data_csv = gene_loc_data_csv.fillna("None")
row_name = "imagerow"
col_name = "imagecol"
cell_loc = gene_loc_data_csv[[row_name,col_name]].values
distance_np = pdist(cell_loc, metric = "euclidean")
distance_np_X =squareform(distance_np)
distance_loc_csv = pd.DataFrame(index=gene_loc_data_csv.index, columns=gene_loc_data_csv.index,data = distance_np_X)
threshold = 8
num_big = np.where((0< distance_np_X)&(distance_np_X < threshold))[0].shape[0]
#num_big = np.where((0< distance_np_X)&(distance_np_X < threshold))[0].shape[0]
adj_matrix = np.zeros(distance_np_X.shape)
non_zero_point = np.where((0 < distance_np_X) & (distance_np_X < threshold))
adj_matrix = np.zeros(distance_np_X.shape)
non_zero_point = np.where((0< distance_np_X)&(distance_np_X<threshold))
for i in range(num_big):
    x = non_zero_point[0][i]
    y = non_zero_point[1][i]
    adj_matrix[x][y] = 1 
adj_matrix = adj_matrix + np.eye(distance_np_X.shape[0])
adj_matrix  = np.float32(adj_matrix)
adj_matrix_crs = sparse.csr_matrix(adj_matrix)
graph = dgl.from_scipy(adj_matrix_crs,eweight_name='w')
min_cells = 5
pca_n_comps = 3000

In [5]:
graph

Graph(num_nodes=3639, num_edges=24763,
      ndata_schemes={}
      edata_schemes={'w': Scheme(shape=(), dtype=torch.float32)})

In [6]:
from scanpy import read_10x_h5
adata = read_10x_h5("/home/sunhang/Embedding/SpaGCN/tutorial/data/" + sample_name +"/filtered_feature_bc_matrix.h5")
spatial=pd.read_csv("/home/sunhang/Embedding/SpaGCN/tutorial/data/" + sample_name +"/tissue_positions_list.txt",sep=",",header=None,na_filter=False,index_col=0) 
adata.obs["x1"]=spatial[1]
adata.obs["x2"]=spatial[2]
adata.obs["x3"]=spatial[3]
adata.obs["x4"]=spatial[4]
adata.obs["x5"]=spatial[5]
#Select captured samples
adata=adata[adata.obs["x1"]==1]
adata.var_names=[i.upper() for i in list(adata.var_names)]
adata.var["genename"]=adata.var.index.astype("str")

adata.var_names_make_unique

  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")


<bound method AnnData.var_names_make_unique of AnnData object with n_obs × n_vars = 3639 × 33538
    obs: 'x1', 'x2', 'x3', 'x4', 'x5'
    var: 'gene_ids', 'feature_types', 'genome', 'genename'>

In [7]:
gene_loc_data_csv

Unnamed: 0,barcode,sample_name,tissue,row,col,imagerow,imagecol,Cluster,height,width,...,SpatialDE_PCA_spatial,SpatialDE_pool_PCA_spatial,HVG_PCA_spatial,pseudobulk_PCA_spatial,markers_PCA_spatial,SpatialDE_UMAP_spatial,SpatialDE_pool_UMAP_spatial,HVG_UMAP_spatial,pseudobulk_UMAP_spatial,markers_UMAP_spatial
AAACAAGTATCTCCCA-1.8,AAACAAGTATCTCCCA-1,151673,1,50,102,381.098123,440.639079,7,600,600,...,3,1,1,3,1,7,1,1,2,1
AAACAATCTACTAGCA-1.3,AAACAATCTACTAGCA-1,151673,1,3,43,126.327637,259.630972,4,600,600,...,7,5,2,2,3,2,1,4,2,3
AAACACCAATAACTGC-1.8,AAACACCAATAACTGC-1,151673,1,59,19,427.767792,183.078314,8,600,600,...,5,4,4,5,3,5,7,5,3,2
AAACAGAGCGACTCCT-1.7,AAACAGAGCGACTCCT-1,151673,1,14,94,186.813688,417.236738,6,600,600,...,3,3,1,2,2,3,4,2,1,1
AAACAGCTTTCAGAAG-1.7,AAACAGCTTTCAGAAG-1,151673,1,43,9,341.269139,152.700275,3,600,600,...,2,1,2,4,1,3,3,8,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTGTTTCACATCCAGG-1.8,TTGTTTCACATCCAGG-1,151673,1,58,42,422.862301,254.410450,8,600,600,...,5,4,4,5,8,5,7,5,7,5
TTGTTTCATTAGTCTA-1.8,TTGTTTCATTAGTCTA-1,151673,1,60,30,433.393354,217.146722,8,600,600,...,5,4,4,5,8,5,7,1,1,4
TTGTTTCCATACAACT-1.8,TTGTTTCCATACAACT-1,151673,1,45,27,352.430255,208.415849,6,600,600,...,6,6,7,6,3,1,3,3,3,3
TTGTTTGTATTACACG-1.4,TTGTTTGTATTACACG-1,151673,1,73,41,503.735391,250.720081,6,600,600,...,6,6,8,5,3,1,3,4,1,3


In [8]:
adata.obs

Unnamed: 0,x1,x2,x3,x4,x5
AAACAAGTATCTCCCA-1,1,50,102,8468,9791
AAACAATCTACTAGCA-1,1,3,43,2807,5769
AAACACCAATAACTGC-1,1,59,19,9505,4068
AAACAGAGCGACTCCT-1,1,14,94,4151,9271
AAACAGCTTTCAGAAG-1,1,43,9,7583,3393
...,...,...,...,...,...
TTGTTTCACATCCAGG-1,1,58,42,9396,5653
TTGTTTCATTAGTCTA-1,1,60,30,9630,4825
TTGTTTCCATACAACT-1,1,45,27,7831,4631
TTGTTTGTATTACACG-1,1,73,41,11193,5571


In [9]:
pca_n_comps = 3000
adata.obs = pd.merge(gene_loc_data_csv,adata.obs,right_index=True,left_on="barcode")
adata.obs.index = adata.obs["barcode"]
le = LabelEncoder()
label = le.fit_transform(adata.obs['layer_guess_reordered_short'])
adata.obs["lay_num"] = label
sc.pp.filter_genes(adata, min_cells=5)
adata_X = sc.pp.normalize_total(adata, target_sum=1, exclude_highly_expressed=True, inplace=False)['X']
adata_X = sc.pp.scale(adata_X)
adata_X = sc.pp.pca(adata_X, n_comps=pca_n_comps)
graph.ndata["feat"] = torch.tensor(adata_X.copy())
num_features = graph.ndata["feat"].shape[1]
num_classes = len(set(adata.obs.lay_num))-1

  utils.warn_names_duplicates("var")


In [10]:
adata.obs.columns

Index(['barcode', 'sample_name', 'tissue', 'row', 'col', 'imagerow',
       'imagecol', 'Cluster', 'height', 'width', 'sum_umi', 'sum_gene',
       'subject', 'position', 'replicate', 'subject_position', 'discard',
       'key', 'cell_count', 'SNN_k50_k4', 'SNN_k50_k5', 'SNN_k50_k6',
       'SNN_k50_k7', 'SNN_k50_k8', 'SNN_k50_k9', 'SNN_k50_k10', 'SNN_k50_k11',
       'SNN_k50_k12', 'SNN_k50_k13', 'SNN_k50_k14', 'SNN_k50_k15',
       'SNN_k50_k16', 'SNN_k50_k17', 'SNN_k50_k18', 'SNN_k50_k19',
       'SNN_k50_k20', 'SNN_k50_k21', 'SNN_k50_k22', 'SNN_k50_k23',
       'SNN_k50_k24', 'SNN_k50_k25', 'SNN_k50_k26', 'SNN_k50_k27',
       'SNN_k50_k28', 'GraphBased', 'Maynard', 'Martinowich', 'Layer',
       'layer_guess', 'layer_guess_reordered', 'layer_guess_reordered_short',
       'expr_chrM', 'expr_chrM_ratio', 'SpatialDE_PCA', 'SpatialDE_pool_PCA',
       'HVG_PCA', 'pseudobulk_PCA', 'markers_PCA', 'SpatialDE_UMAP',
       'SpatialDE_pool_UMAP', 'HVG_UMAP', 'pseudobulk_UMAP', 'markers_UM

In [11]:
def normalize(adata, copy=True, highly_genes = None, filter_min_counts=True, 
              size_factors=True, normalize_input=True, logtrans_input=True):
    """
    Normalizes input data and retains only most variable genes 
    (indicated by highly_genes parameter)

    Args:
        adata ([type]): [description]
        copy (bool, optional): [description]. Defaults to True.
        highly_genes ([type], optional): [description]. Defaults to None.
        filter_min_counts (bool, optional): [description]. Defaults to True.
        size_factors (bool, optional): [description]. Defaults to True.
        normalize_input (bool, optional): [description]. Defaults to True.
        logtrans_input (bool, optional): [description]. Defaults to True.

    Raises:
        NotImplementedError: [description]

    Returns:
        [type]: [description]
    """
    if isinstance(adata, sc.AnnData):
        if copy:
            adata = adata.copy()
    elif isinstance(adata, str):
        adata = sc.read(adata)
    else:
        raise NotImplementedError
    norm_error = 'Make sure that the dataset (adata.X) contains unnormalized count data.'
    assert 'n_count' not in adata.obs, norm_error
    if adata.X.size < 50e6: # check if adata.X is integer only if array is small
        if sci.sparse.issparse(adata.X):
            assert (adata.X.astype(int) != adata.X).nnz == 0, norm_error
        else:
            assert np.all(adata.X.astype(int) == adata.X), norm_error

    if filter_min_counts:
        sc.pp.filter_genes(adata, min_counts=1)#3
        sc.pp.filter_cells(adata, min_counts=1)
    if size_factors or normalize_input or logtrans_input:
        adata.raw = adata.copy()
    else:
        adata.raw = adata
    if size_factors:
        sc.pp.normalize_per_cell(adata)
        adata.obs['size_factors'] = adata.obs.n_counts / np.median(adata.obs.n_counts)
    else:
        adata.obs['size_factors'] = 1.0
    if logtrans_input:
        sc.pp.log1p(adata)
    if highly_genes != None:
        sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5, n_top_genes = highly_genes, subset=True)
    if normalize_input:
        sc.pp.scale(adata)
    return adata

In [12]:
import argparse
parser = argparse.ArgumentParser(description="GAT")
parser.add_argument("--seeds", type=int, nargs="+", default=[0])
parser.add_argument("--dataset", type=str, default="cora")
parser.add_argument("--device", type=int, default=-1)
parser.add_argument("--max_epoch", type=int, default=200,
                    help="number of training epochs")
parser.add_argument("--warmup_steps", type=int, default=-1)

parser.add_argument("--num_heads", type=int, default=4,
                    help="number of hidden attention heads")
parser.add_argument("--num_out_heads", type=int, default=1,
                    help="number of output attention heads")
parser.add_argument("--num_layers", type=int, default=2,
                    help="number of hidden layers")
parser.add_argument("--num_hidden", type=int, default=256,
                    help="number of hidden units")
parser.add_argument("--residual", action="store_true", default=False,
                    help="use residual connection")
parser.add_argument("--in_drop", type=float, default=.2,
                    help="input feature dropout")
parser.add_argument("--attn_drop", type=float, default=.1,
                    help="attention dropout")
parser.add_argument("--norm", type=str, default=None)
parser.add_argument("--lr", type=float, default=0.005,
                    help="learning rate")
parser.add_argument("--weight_decay", type=float, default=5e-4,
                    help="weight decay")
parser.add_argument("--negative_slope", type=float, default=0.2,
                    help="the negative slope of leaky relu for GAT")
parser.add_argument("--activation", type=str, default="prelu")
parser.add_argument("--mask_rate", type=float, default=0.5)
parser.add_argument("--drop_edge_rate", type=float, default=0.0)
parser.add_argument("--replace_rate", type=float, default=0.0)

parser.add_argument("--encoder", type=str, default="gat")
parser.add_argument("--decoder", type=str, default="gat")
parser.add_argument("--loss_fn", type=str, default="byol")
parser.add_argument("--alpha_l", type=float, default=2, help="`pow`inddex for `sce` loss")
parser.add_argument("--optimizer", type=str, default="adam")

parser.add_argument("--max_epoch_f", type=int, default=30)
parser.add_argument("--lr_f", type=float, default=0.001, help="learning rate for evaluation")
parser.add_argument("--weight_decay_f", type=float, default=0.0, help="weight decay for evaluation")
parser.add_argument("--linear_prob", action="store_true", default=False)

parser.add_argument("--load_model", action="store_true")
parser.add_argument("--save_model", action="store_true")
parser.add_argument("--use_cfg", action="store_true")
parser.add_argument("--logging", action="store_true")
parser.add_argument("--scheduler", action="store_true", default=False)
parser.add_argument("--concat_hidden", action="store_true", default=False)

# for graph classification
parser.add_argument("--pooling", type=str, default="mean")
parser.add_argument("--deg4feat", action="store_true", default=False, help="use node degree as input feature")
parser.add_argument("--batch_size", type=int, default=32)

_StoreAction(option_strings=['--batch_size'], dest='batch_size', nargs=None, const=None, default=32, type=<class 'int'>, choices=None, help=None, metavar=None)

In [13]:
args = parser.parse_args([])
args.lr = 0.001
args.lr_f = 0.01
args.num_hidden = 512
args.num_heads = 4
args.weight_decay = 2e-4
args.weight_decay_f= 1e-4
args.max_epoch= 600
args.max_epoch_f= 300
args.mask_rate= 0.5
args.num_layers= 2
args.encoder= "gat"
args.decoder= "gat" 
args.activation= "prelu"
args.in_drop= 0.2
args.attn_drop= 0.1
args.linear_prob= True
args.loss_fn= "sce" 
args.drop_edge_rate=0.0
args.optimizer= "adam"
args.replace_rate= 0.05 
args.alpha_l= 3
args.scheduler= True
args.dataset = "sp"


#参数传递
device = args.device if args.device >= 0 else "cpu"
seeds = args.seeds
dataset_name = args.dataset
max_epoch = args.max_epoch
max_epoch_f = args.max_epoch_f
num_hidden = args.num_hidden
num_layers = args.num_layers
encoder_type = args.encoder
decoder_type = args.decoder
replace_rate = args.replace_rate

optim_type = args.optimizer 
loss_fn = args.loss_fn

lr = args.lr
weight_decay = args.weight_decay
lr_f = args.lr_f
weight_decay_f = args.weight_decay_f
linear_prob = args.linear_prob
load_model = args.load_model
save_model = args.save_model
logs = args.logging
use_scheduler = args.scheduler
args.num_features = num_features

In [15]:
for num_set in np.arange(200,1000,100):
    for num_pro_set in np.arange(0,1,0.1):
        print(num_set)
        print(num_pro_set)
        gene_loc_data_csv = pd.read_csv(gene_loc_data_file,index_col=0)
        gene_loc_data_csv = gene_loc_data_csv.fillna("None")
        row_name = "imagerow"
        col_name = "imagecol"
        cell_loc = gene_loc_data_csv[[row_name,col_name]].values
        distance_np = pdist(cell_loc, metric = "euclidean")
        distance_np_X =squareform(distance_np)
        distance_loc_csv = pd.DataFrame(index=gene_loc_data_csv.index, columns=gene_loc_data_csv.index,data = distance_np_X)
        threshold = 8
        num_big = np.where((0< distance_np_X)&(distance_np_X < threshold))[0].shape[0]
        #num_big = np.where((0< distance_np_X)&(distance_np_X < threshold))[0].shape[0]
        adj_matrix = np.zeros(distance_np_X.shape)
        non_zero_point = np.where((0 < distance_np_X) & (distance_np_X < threshold))
        adj_matrix = np.zeros(distance_np_X.shape)
        non_zero_point = np.where((0< distance_np_X)&(distance_np_X<threshold))
        for i in range(num_big):
            x = non_zero_point[0][i]
            y = non_zero_point[1][i]
            adj_matrix[x][y] = 1 
        adj_matrix = adj_matrix + np.eye(distance_np_X.shape[0])
        adj_matrix  = np.float32(adj_matrix)
        adj_matrix_crs = sparse.csr_matrix(adj_matrix)
        graph = dgl.from_scipy(adj_matrix_crs,eweight_name='w')
        min_cells = 5
        pca_n_comps = num_set
        adata = read_10x_h5("/home/sunhang/Embedding/SpaGCN/tutorial/data/" + sample_name +"/filtered_feature_bc_matrix.h5")
        spatial=pd.read_csv("/home/sunhang/Embedding/SpaGCN/tutorial/data/" + sample_name +"/tissue_positions_list.txt",sep=",",header=None,na_filter=False,index_col=0) 
        adata.obs["x1"]=spatial[1]
        adata.obs["x2"]=spatial[2]
        adata.obs["x3"]=spatial[3]
        adata.obs["x4"]=spatial[4]
        adata.obs["x5"]=spatial[5]
        #Select captured samples
        adata=adata[adata.obs["x1"]==1]
        adata.var_names=[i.upper() for i in list(adata.var_names)]
        adata.var["genename"]=adata.var.index.astype("str")

        adata.var_names_make_unique
        adata.obs = pd.merge(gene_loc_data_csv,adata.obs,right_index=True,left_on="barcode")
        adata.obs.index = adata.obs["barcode"]
        le = LabelEncoder()
        label = le.fit_transform(adata.obs['layer_guess_reordered_short'])
        adata.obs["lay_num"] = label
        adata = normalize(adata,
                        copy=True,
                        highly_genes=num_set,
                        size_factors=False,
                        normalize_input=True,
                        logtrans_input=True)

        adata_X = adata.X.astype(np.float32)
        graph.ndata["feat"] = torch.tensor(adata_X.copy())
        num_features = graph.ndata["feat"].shape[1]
        num_classes = len(set(adata.obs.lay_num))-1

        #pca_n_comps = num_set
        #adata.obs = pd.merge(gene_loc_data_csv,adata.obs,right_index=True,left_on="barcode")
    #     adata.obs.index = adata.obs["barcode"]
    #     le = LabelEncoder()
    #     label = le.fit_transform(adata.obs['layer_guess_reordered_short'])
    #     adata.obs["lay_num"] = label
    #     sc.pp.filter_genes(adata, min_cells=5)
    #     adata_X = sc.pp.normalize_total(adata, target_sum=1, exclude_highly_expressed=True, inplace=False)['X']
    #     adata_X = sc.pp.scale(adata_X)
        #adata_X = sc.pp.pca(adata_X, n_comps=pca_n_comps)
        graph.ndata["feat"] = torch.tensor(adata_X.copy())
        num_features = graph.ndata["feat"].shape[1]
        num_classes = len(set(adata.obs.lay_num))-1
        #for num_set in np.arange(1000,4000,500):
        print(num_set)
        args = parser.parse_args([])
        args.lr = 0.001
        args.lr_f = 0.01
        args.num_hidden = 512
        args.num_heads = 4
        args.weight_decay = 2e-4
        args.weight_decay_f= 1e-4
        args.max_epoch= 500
        args.max_epoch_f= 500
        args.mask_rate=  num_pro_set
        args.num_layers= 2
        args.encoder= "gat"
        args.decoder= "gat" 
        args.activation= "prelu"
        args.in_drop= 0.2
        args.attn_drop= 0.1
        args.linear_prob= True
        args.loss_fn= "sce" 
        args.drop_edge_rate=0.0
        args.optimizer= "adam"
        args.replace_rate= 0.05 
        args.alpha_l= 3
        args.scheduler= True
        args.dataset = "sp"


        #参数传递
        device = args.device if args.device >= 0 else "cpu"
        seeds = args.seeds
        dataset_name = args.dataset
        max_epoch = args.max_epoch
        max_epoch_f = args.max_epoch_f
        num_hidden = args.num_hidden
        num_layers = args.num_layers
        encoder_type = args.encoder
        decoder_type = args.decoder
        replace_rate = args.replace_rate

        optim_type = args.optimizer 
        loss_fn = args.loss_fn

        lr = args.lr
        weight_decay = args.weight_decay
        lr_f = args.lr_f
        weight_decay_f = args.weight_decay_f
        linear_prob = args.linear_prob
        load_model = args.load_model
        save_model = args.save_model
        logs = args.logging
        use_scheduler = args.scheduler
        args.num_features = num_features

        acc_list = []
        estp_acc_list = []
        times = 3

        #print(f"####### Run {i} for seed {seed}")
        #print(i)
        seed = 0
        set_random_seed(seed)
        if logs:
            logger = TBLogger(name=f"{dataset_name}_loss_{loss_fn}_rpr_{replace_rate}_nh_{num_hidden}_nl_{num_layers}_lr_{lr}_mp_{max_epoch}_mpf_{max_epoch_f}_wd_{weight_decay}_wdf_{weight_decay_f}_{encoder_type}_{decoder_type}")
        else:
            logger = None
        model = build_model(args)
        device = 1
        model.to(device)
        optimizer = create_optimizer(optim_type, model, lr, weight_decay)

        if use_scheduler:
            logging.info("Use schedular")
            scheduler = lambda epoch :( 1 + np.cos((epoch) * np.pi / max_epoch) ) * 0.5
            # scheduler = lambda epoch: epoch / warmup_steps if epoch < warmup_steps \
                    # else ( 1 + np.cos((epoch - warmup_steps) * np.pi / (max_epoch - warmup_steps))) * 0.5
            scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=scheduler)
        else:
            scheduler = None
        #训练模型
        x = graph.ndata["feat"]
        if not load_model:
            model = pretrain(model, graph, x, optimizer, max_epoch, device, scheduler, num_classes, lr_f, weight_decay_f, max_epoch_f, linear_prob, logger)
            model = model.cpu()
        x = graph.ndata["feat"]
        model.to(device)
        embedding = model.embed(graph.to(device), x.to(device))
        new_pred = kMeans_use(embedding.cpu().detach().numpy(),num_classes)
        adata.obs["pre"] = new_pred
        score = adjusted_rand_score(adata.obs.lay_num.values, new_pred )
        print("first cluster:")
        print(score)
        print("结果:" + str(Counter(new_pred)))
        print("ground_truth :"  + str(Counter(adata.obs.lay_num.values)))
        x = graph.ndata["feat"]
        test = model.embed(graph.to(device), x.to(device))
        test_new_pred = kMeans_use(test.cpu().detach().numpy(),num_classes)
        score = adjusted_rand_score(adata.obs.lay_num.values, test_new_pred )
        adata.obs["second_pre"] = test_new_pred
        print("second cluster:")
        print(score)
        print("结果:" + str(Counter(test_new_pred)))
        print("ground_truth :"  + str(Counter(adata.obs.lay_num.values)))
        #drawPicture(adata.obs,"imagecol","imagerow",colorattribute="lay_num",save_file= None)
        #drawPicture(adata.obs,"imagecol","imagerow",colorattribute="pre",save_file= None)
        #drawPicture(adata.obs,"imagecol","imagerow",colorattribute="second_pre",save_file= None)

200
0.0


  utils.warn_names_duplicates("var")
2022-09-11 20:39:55,280 - INFO - Use schedular
2022-09-11 20:39:55,281 - INFO - start training..


200


# Epoch 499: train_loss: nan: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:14<00:00, 33.85it/s]


first cluster:
0.1405460539335906
结果:Counter({6: 1913, 3: 511, 0: 484, 1: 380, 2: 274, 5: 49, 4: 28})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.12375866853124855
结果:Counter({1: 1821, 0: 598, 5: 464, 4: 253, 3: 233, 2: 199, 6: 71})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
200
0.1


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:40:13,514 - INFO - Use schedular
2022-09-11 20:40:13,515 - INFO - start training..


200


# Epoch 499: train_loss: 0.2201: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 30.49it/s]


first cluster:
0.231102796031939
结果:Counter({2: 1482, 5: 999, 0: 482, 4: 383, 3: 276, 1: 16, 6: 1})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.2359263826312535
结果:Counter({3: 1382, 5: 1169, 2: 536, 4: 517, 1: 18, 0: 16, 6: 1})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
200
0.2


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:40:38,464 - INFO - Use schedular
2022-09-11 20:40:38,478 - INFO - start training..


200


# Epoch 499: train_loss: 0.2237: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:22<00:00, 22.72it/s]


first cluster:
0.28257329288408045
结果:Counter({5: 1497, 1: 820, 2: 614, 3: 371, 6: 238, 0: 88, 4: 11})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.14304897983974696
结果:Counter({1: 1793, 4: 691, 0: 509, 3: 342, 2: 293, 6: 10, 5: 1})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
200
0.30000000000000004


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:41:13,400 - INFO - Use schedular
2022-09-11 20:41:13,403 - INFO - start training..


200


# Epoch 499: train_loss: 0.2445: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:19<00:00, 26.14it/s]


first cluster:
0.2964949228810805
结果:Counter({6: 866, 4: 852, 5: 583, 1: 371, 3: 369, 2: 339, 0: 259})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.28256116950028554
结果:Counter({3: 1197, 4: 859, 1: 565, 5: 342, 6: 337, 2: 268, 0: 71})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
200
0.4


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:41:38,176 - INFO - Use schedular
2022-09-11 20:41:38,176 - INFO - start training..


200


# Epoch 499: train_loss: 0.2514: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 32.40it/s]


first cluster:
0.2954338779503472
结果:Counter({0: 880, 4: 816, 2: 554, 5: 402, 1: 363, 6: 359, 3: 265})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.30124349391266764
结果:Counter({5: 896, 0: 796, 2: 566, 6: 397, 4: 383, 3: 346, 1: 255})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
200
0.5


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:41:56,968 - INFO - Use schedular
2022-09-11 20:41:56,969 - INFO - start training..


200


# Epoch 499: train_loss: 0.2842: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 32.57it/s]


first cluster:
0.31074631775262607
结果:Counter({5: 901, 1: 891, 6: 753, 4: 393, 3: 367, 2: 246, 0: 88})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.301319321702017
结果:Counter({3: 833, 4: 791, 2: 520, 6: 501, 0: 363, 1: 352, 5: 279})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
200
0.6000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:42:15,514 - INFO - Use schedular
2022-09-11 20:42:15,514 - INFO - start training..


200


# Epoch 499: train_loss: 0.3230: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 31.98it/s]


first cluster:
0.28854244187776734
结果:Counter({4: 888, 0: 754, 3: 463, 5: 463, 6: 434, 1: 374, 2: 263})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.2910660890495266
结果:Counter({6: 877, 3: 774, 4: 484, 2: 434, 0: 431, 1: 367, 5: 272})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
200
0.7000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:42:34,533 - INFO - Use schedular
2022-09-11 20:42:34,534 - INFO - start training..


200


# Epoch 499: train_loss: 0.4015: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 32.73it/s]


first cluster:
0.2927542976282484
结果:Counter({3: 851, 4: 748, 0: 483, 2: 473, 5: 458, 1: 349, 6: 277})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.29097564507488044
结果:Counter({2: 897, 4: 704, 0: 515, 5: 447, 6: 432, 3: 383, 1: 261})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
200
0.8


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:42:55,690 - INFO - Use schedular
2022-09-11 20:42:55,700 - INFO - start training..


200


# Epoch 499: train_loss: 0.5070: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:22<00:00, 21.76it/s]


first cluster:
0.2906664751612795
结果:Counter({4: 892, 0: 717, 6: 470, 5: 465, 2: 436, 3: 394, 1: 265})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.29533005537074924
结果:Counter({0: 901, 5: 735, 6: 473, 2: 465, 3: 420, 1: 353, 4: 292})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
200
0.9


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:43:29,761 - INFO - Use schedular
2022-09-11 20:43:29,769 - INFO - start training..


200


# Epoch 499: train_loss: 0.6949: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:21<00:00, 23.16it/s]


first cluster:
0.28150042955526766
结果:Counter({0: 906, 6: 695, 5: 484, 4: 472, 2: 444, 1: 351, 3: 287})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.2842019213037552
结果:Counter({5: 928, 0: 691, 3: 469, 2: 454, 6: 444, 1: 378, 4: 275})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
300
0.0


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:43:59,481 - INFO - Use schedular
2022-09-11 20:43:59,483 - INFO - start training..


300


# Epoch 499: train_loss: nan: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:14<00:00, 33.94it/s]


first cluster:
0.1869317194489913
结果:Counter({2: 1301, 0: 779, 6: 427, 3: 405, 1: 382, 5: 307, 4: 38})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.22118635959990207
结果:Counter({1: 1360, 3: 654, 4: 500, 5: 437, 2: 365, 6: 304, 0: 19})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
300
0.1


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:44:17,721 - INFO - Use schedular
2022-09-11 20:44:17,722 - INFO - start training..


300


# Epoch 499: train_loss: 0.2316: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 32.40it/s]


first cluster:
0.2945956018741985
结果:Counter({2: 851, 4: 820, 5: 603, 3: 404, 0: 357, 1: 348, 6: 256})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.2961501769631115
结果:Counter({3: 1091, 1: 840, 6: 460, 0: 403, 5: 373, 2: 258, 4: 214})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
300
0.2


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:44:36,766 - INFO - Use schedular
2022-09-11 20:44:36,767 - INFO - start training..


300


# Epoch 499: train_loss: 0.2233: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 32.46it/s]


first cluster:
0.2991209994122101
结果:Counter({5: 787, 3: 762, 4: 638, 6: 429, 1: 412, 0: 369, 2: 242})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3119100445204277
结果:Counter({5: 783, 6: 701, 4: 673, 1: 481, 3: 390, 2: 375, 0: 236})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
300
0.30000000000000004


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:44:55,799 - INFO - Use schedular
2022-09-11 20:44:55,800 - INFO - start training..


300


# Epoch 499: train_loss: 0.2405: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 31.40it/s]


first cluster:
0.302226417935296
结果:Counter({5: 740, 0: 733, 6: 609, 2: 473, 3: 451, 1: 364, 4: 269})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.2778865583617231
结果:Counter({4: 1067, 0: 765, 2: 488, 6: 414, 1: 388, 3: 264, 5: 253})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
300
0.4


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:45:15,084 - INFO - Use schedular
2022-09-11 20:45:15,085 - INFO - start training..


300


# Epoch 499: train_loss: 0.2666: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 32.41it/s]


first cluster:
0.3062037316937529
结果:Counter({2: 756, 3: 730, 6: 577, 4: 479, 0: 478, 5: 357, 1: 262})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3077396549656543
结果:Counter({5: 726, 6: 713, 3: 593, 0: 487, 4: 481, 1: 370, 2: 269})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
300
0.5


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:45:39,480 - INFO - Use schedular
2022-09-11 20:45:39,494 - INFO - start training..


300


# Epoch 499: train_loss: 0.2879: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:20<00:00, 24.38it/s]


first cluster:
0.2978128402674119
结果:Counter({2: 816, 0: 714, 4: 552, 6: 479, 5: 446, 3: 365, 1: 267})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.31173814588129495
结果:Counter({2: 799, 6: 683, 4: 533, 5: 506, 0: 479, 1: 382, 3: 257})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
300
0.6000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:46:13,087 - INFO - Use schedular
2022-09-11 20:46:13,088 - INFO - start training..


300


# Epoch 499: train_loss: 0.3375: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:19<00:00, 25.18it/s]


first cluster:
0.303037666519524
结果:Counter({2: 812, 1: 738, 6: 548, 3: 476, 4: 439, 5: 357, 0: 269})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.296578411088169
结果:Counter({3: 880, 6: 734, 4: 501, 2: 450, 0: 444, 1: 360, 5: 270})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
300
0.7000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:46:39,263 - INFO - Use schedular
2022-09-11 20:46:39,264 - INFO - start training..


300


# Epoch 499: train_loss: 0.4040: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 32.26it/s]


first cluster:
0.3076325854826019
结果:Counter({1: 798, 2: 725, 5: 580, 6: 474, 3: 435, 0: 363, 4: 264})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.31698471625627284
结果:Counter({3: 813, 0: 727, 4: 525, 6: 506, 2: 436, 1: 382, 5: 250})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
300
0.8


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:46:58,152 - INFO - Use schedular
2022-09-11 20:46:58,153 - INFO - start training..


300


# Epoch 499: train_loss: 0.5160: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:17<00:00, 29.33it/s]


first cluster:
0.2957991434165294
结果:Counter({5: 858, 0: 736, 6: 510, 4: 503, 2: 401, 1: 347, 3: 284})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.2919732364692902
结果:Counter({6: 847, 0: 737, 3: 504, 5: 481, 1: 421, 2: 379, 4: 270})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
300
0.9


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:47:24,039 - INFO - Use schedular
2022-09-11 20:47:24,049 - INFO - start training..


300


# Epoch 499: train_loss: 0.6762: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:17<00:00, 28.92it/s]


first cluster:
0.265603013598942
结果:Counter({4: 1028, 2: 860, 0: 561, 5: 476, 1: 263, 3: 235, 6: 216})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.254704763229299
结果:Counter({2: 1055, 4: 857, 0: 542, 6: 481, 5: 272, 1: 220, 3: 212})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
400
0.0


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:47:44,758 - INFO - Use schedular
2022-09-11 20:47:44,759 - INFO - start training..


400


# Epoch 499: train_loss: nan: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 32.91it/s]


first cluster:
0.12781777287565954
结果:Counter({3: 1781, 6: 696, 2: 421, 4: 263, 1: 235, 0: 204, 5: 39})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.14690602707282754
结果:Counter({5: 1555, 3: 876, 2: 450, 0: 244, 4: 244, 6: 233, 1: 37})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
400
0.1


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:48:03,645 - INFO - Use schedular
2022-09-11 20:48:03,646 - INFO - start training..


400


# Epoch 499: train_loss: 0.2365: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 30.54it/s]


first cluster:
0.1763949992721433
结果:Counter({5: 1400, 2: 1080, 4: 441, 3: 364, 1: 259, 0: 93, 6: 2})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.17717339986570033
结果:Counter({2: 1443, 3: 1066, 0: 455, 5: 359, 4: 302, 6: 9, 1: 5})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
400
0.2


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:48:30,006 - INFO - Use schedular
2022-09-11 20:48:30,010 - INFO - start training..


400


# Epoch 499: train_loss: 0.2429: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:21<00:00, 23.21it/s]


first cluster:
0.22756581773792062
结果:Counter({3: 988, 2: 973, 5: 521, 1: 496, 4: 377, 6: 283, 0: 1})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.33288988402427866
结果:Counter({5: 877, 2: 870, 6: 618, 1: 615, 0: 375, 4: 282, 3: 2})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
400
0.30000000000000004


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:49:03,826 - INFO - Use schedular
2022-09-11 20:49:03,842 - INFO - start training..


400


# Epoch 499: train_loss: 0.2595: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:20<00:00, 24.58it/s]


first cluster:
0.3252061256252355
结果:Counter({1: 775, 6: 768, 4: 590, 0: 491, 2: 382, 5: 361, 3: 272})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3134199502718419
结果:Counter({4: 846, 2: 750, 5: 617, 0: 427, 3: 374, 6: 350, 1: 275})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
400
0.4


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:49:29,183 - INFO - Use schedular
2022-09-11 20:49:29,184 - INFO - start training..


400


# Epoch 499: train_loss: 0.2778: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 30.81it/s]


first cluster:
0.32002396781218156
结果:Counter({0: 844, 2: 702, 6: 626, 3: 459, 1: 368, 5: 360, 4: 280})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.32393758096575787
结果:Counter({1: 850, 4: 719, 0: 607, 5: 483, 6: 360, 3: 346, 2: 274})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
400
0.5


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:49:55,190 - INFO - Use schedular
2022-09-11 20:49:55,194 - INFO - start training..


400


# Epoch 499: train_loss: 0.3162: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:19<00:00, 25.69it/s]


first cluster:
0.32074748628613425
结果:Counter({0: 861, 6: 724, 5: 632, 2: 443, 1: 363, 3: 342, 4: 274})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.32460676761589846
结果:Counter({0: 779, 3: 703, 4: 645, 2: 494, 1: 376, 6: 376, 5: 266})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
400
0.6000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:50:19,014 - INFO - Use schedular
2022-09-11 20:50:19,015 - INFO - start training..


400


# Epoch 499: train_loss: 0.3461: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 32.13it/s]


first cluster:
0.32800841798848535
结果:Counter({4: 798, 1: 727, 3: 621, 2: 501, 5: 376, 6: 353, 0: 263})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3315201724317686
结果:Counter({4: 835, 0: 716, 6: 655, 2: 479, 5: 392, 3: 305, 1: 257})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
400
0.7000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:50:42,603 - INFO - Use schedular
2022-09-11 20:50:42,617 - INFO - start training..


400


# Epoch 499: train_loss: 0.4273: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:19<00:00, 26.09it/s]


first cluster:
0.3239226438517446
结果:Counter({6: 796, 3: 750, 2: 630, 4: 465, 0: 368, 5: 344, 1: 286})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3207654225739009
结果:Counter({0: 781, 3: 757, 4: 607, 2: 479, 5: 377, 6: 368, 1: 270})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
400
0.8


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:51:09,099 - INFO - Use schedular
2022-09-11 20:51:09,102 - INFO - start training..


400


# Epoch 499: train_loss: 0.5188: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:19<00:00, 25.45it/s]


first cluster:
0.3311729052043075
结果:Counter({5: 757, 2: 733, 4: 600, 3: 547, 1: 404, 0: 353, 6: 245})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.32731380437311264
结果:Counter({6: 737, 3: 730, 4: 598, 2: 543, 1: 396, 0: 368, 5: 267})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
400
0.9


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:51:36,450 - INFO - Use schedular
2022-09-11 20:51:36,451 - INFO - start training..


400


# Epoch 499: train_loss: 0.6813: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 29.96it/s]


first cluster:
0.32039486388213706
结果:Counter({6: 728, 2: 723, 1: 578, 5: 541, 3: 416, 4: 368, 0: 285})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.31942745465644007
结果:Counter({3: 753, 0: 734, 6: 574, 2: 504, 1: 405, 5: 384, 4: 285})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
500
0.0


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:51:56,247 - INFO - Use schedular
2022-09-11 20:51:56,248 - INFO - start training..


500


# Epoch 499: train_loss: nan: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 32.99it/s]


first cluster:
0.1744873211024921
结果:Counter({6: 1234, 3: 680, 1: 527, 2: 525, 0: 253, 4: 234, 5: 186})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.18014888596567988
结果:Counter({2: 1341, 1: 601, 4: 497, 5: 493, 3: 277, 6: 247, 0: 183})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
500
0.1


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:52:14,604 - INFO - Use schedular
2022-09-11 20:52:14,605 - INFO - start training..


500


# Epoch 499: train_loss: 0.2701: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 30.88it/s]


first cluster:
0.32672195317852076
结果:Counter({0: 909, 4: 907, 5: 731, 6: 473, 1: 350, 3: 263, 2: 6})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.35064149912966613
结果:Counter({4: 918, 1: 773, 5: 518, 2: 417, 3: 372, 6: 363, 0: 278})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
500
0.2


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:52:34,765 - INFO - Use schedular
2022-09-11 20:52:34,766 - INFO - start training..


500


# Epoch 499: train_loss: 0.2557: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:19<00:00, 25.24it/s]


first cluster:
0.20588097509807232
结果:Counter({0: 1122, 6: 976, 4: 476, 5: 379, 3: 360, 2: 259, 1: 67})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.345548245029568
结果:Counter({3: 823, 6: 804, 2: 512, 1: 453, 4: 432, 0: 354, 5: 261})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
500
0.30000000000000004


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:53:07,410 - INFO - Use schedular
2022-09-11 20:53:07,416 - INFO - start training..


500


# Epoch 499: train_loss: 0.2718: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:22<00:00, 22.68it/s]


first cluster:
0.30501115794082506
结果:Counter({4: 851, 3: 747, 6: 648, 5: 417, 1: 381, 2: 317, 0: 278})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.30700846559303013
结果:Counter({0: 846, 5: 719, 3: 641, 6: 477, 4: 353, 2: 319, 1: 284})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
500
0.4


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:53:39,957 - INFO - Use schedular
2022-09-11 20:53:39,958 - INFO - start training..


500


# Epoch 499: train_loss: 0.2802: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 30.51it/s]


first cluster:
0.3042936240277558
结果:Counter({6: 890, 3: 717, 1: 587, 4: 410, 2: 390, 5: 390, 0: 255})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.29966196357706215
结果:Counter({2: 898, 5: 695, 6: 639, 3: 432, 1: 349, 0: 346, 4: 280})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
500
0.5


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:54:00,441 - INFO - Use schedular
2022-09-11 20:54:00,442 - INFO - start training..


500


# Epoch 499: train_loss: 0.3264: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 30.69it/s]


first cluster:
0.3098175842346206
结果:Counter({1: 898, 2: 703, 5: 618, 4: 416, 6: 379, 3: 374, 0: 251})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3231897593149914
结果:Counter({2: 828, 5: 695, 4: 684, 6: 417, 0: 398, 1: 379, 3: 238})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
500
0.6000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:54:19,880 - INFO - Use schedular
2022-09-11 20:54:19,881 - INFO - start training..


500


# Epoch 499: train_loss: 0.3623: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 30.85it/s]


first cluster:
0.32411612291367753
结果:Counter({1: 859, 6: 715, 5: 605, 4: 443, 0: 401, 3: 375, 2: 241})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3256449926303276
结果:Counter({4: 859, 0: 703, 6: 616, 3: 474, 1: 370, 5: 359, 2: 258})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
500
0.7000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:54:39,545 - INFO - Use schedular
2022-09-11 20:54:39,546 - INFO - start training..


500


# Epoch 499: train_loss: 0.4272: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 30.37it/s]


first cluster:
0.3260863464033105
结果:Counter({2: 854, 3: 678, 5: 633, 6: 486, 0: 364, 1: 363, 4: 261})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3164718323583422
结果:Counter({0: 871, 3: 673, 4: 604, 6: 470, 1: 390, 5: 386, 2: 245})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
500
0.8


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:54:59,484 - INFO - Use schedular
2022-09-11 20:54:59,485 - INFO - start training..


500


# Epoch 499: train_loss: 0.5490: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:17<00:00, 27.98it/s]


first cluster:
0.31533641817982266
结果:Counter({4: 779, 1: 681, 6: 613, 0: 525, 3: 392, 5: 390, 2: 259})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.32450944249937846
结果:Counter({5: 748, 2: 724, 4: 577, 3: 544, 1: 396, 0: 376, 6: 274})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
500
0.9


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:55:26,703 - INFO - Use schedular
2022-09-11 20:55:26,707 - INFO - start training..


500


# Epoch 499: train_loss: 0.7007: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:21<00:00, 22.84it/s]


first cluster:
0.32235337729250774
结果:Counter({0: 726, 2: 698, 6: 617, 3: 542, 1: 404, 4: 371, 5: 281})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.31975125861801174
结果:Counter({2: 720, 4: 708, 6: 608, 1: 541, 5: 388, 3: 386, 0: 288})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
600
0.0


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:55:59,113 - INFO - Use schedular
2022-09-11 20:55:59,116 - INFO - start training..


600


# Epoch 499: train_loss: nan: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:17<00:00, 28.06it/s]


first cluster:
0.14586728430285234
结果:Counter({6: 1582, 3: 636, 4: 385, 1: 369, 2: 339, 5: 299, 0: 29})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.1383340650667187
结果:Counter({1: 1676, 5: 481, 4: 426, 0: 368, 2: 331, 3: 325, 6: 32})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
600
0.1


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:56:21,283 - INFO - Use schedular
2022-09-11 20:56:21,284 - INFO - start training..


600


# Epoch 499: train_loss: 0.2842: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 31.11it/s]


first cluster:
0.32191328271240227
结果:Counter({1: 846, 5: 797, 4: 483, 6: 468, 2: 391, 3: 368, 0: 286})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.20636914052396466
结果:Counter({6: 1381, 0: 690, 3: 495, 5: 373, 2: 362, 1: 275, 4: 63})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
600
0.2


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:56:41,170 - INFO - Use schedular
2022-09-11 20:56:41,171 - INFO - start training..


600


# Epoch 499: train_loss: 0.2741: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 31.13it/s]


first cluster:
0.3053776840805715
结果:Counter({4: 820, 6: 796, 1: 561, 0: 422, 3: 417, 2: 351, 5: 272})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.32301381846007043
结果:Counter({4: 852, 3: 763, 2: 527, 1: 436, 6: 432, 5: 361, 0: 268})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
600
0.30000000000000004


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:57:00,748 - INFO - Use schedular
2022-09-11 20:57:00,749 - INFO - start training..


600


# Epoch 499: train_loss: 0.2881: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 30.12it/s]


first cluster:
0.3122395281793532
结果:Counter({1: 785, 4: 760, 3: 530, 0: 465, 6: 452, 2: 365, 5: 282})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.30865592711817347
结果:Counter({0: 791, 2: 790, 4: 583, 3: 439, 6: 400, 1: 375, 5: 261})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
600
0.4


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:57:21,089 - INFO - Use schedular
2022-09-11 20:57:21,092 - INFO - start training..


600


# Epoch 499: train_loss: 0.3108: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 29.85it/s]


first cluster:
0.30113681873236586
结果:Counter({2: 843, 5: 703, 1: 591, 3: 446, 6: 424, 4: 359, 0: 273})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.31065545417605456
结果:Counter({1: 827, 3: 713, 4: 629, 6: 428, 2: 417, 5: 354, 0: 271})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
600
0.5


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:57:42,288 - INFO - Use schedular
2022-09-11 20:57:42,289 - INFO - start training..


600


# Epoch 499: train_loss: 0.3401: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:19<00:00, 25.07it/s]


first cluster:
0.32119420582515257
结果:Counter({6: 800, 5: 681, 4: 656, 2: 473, 0: 416, 3: 356, 1: 257})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.32309716812027417
结果:Counter({4: 827, 1: 703, 6: 621, 5: 482, 2: 384, 3: 375, 0: 247})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
600
0.6000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:58:11,061 - INFO - Use schedular
2022-09-11 20:58:11,068 - INFO - start training..


600


# Epoch 499: train_loss: 0.3839: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:17<00:00, 28.42it/s]


first cluster:
0.3193814804340261
结果:Counter({1: 845, 3: 685, 4: 631, 0: 453, 6: 393, 5: 378, 2: 254})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.32435179077065274
结果:Counter({4: 851, 6: 669, 2: 632, 3: 448, 5: 410, 1: 386, 0: 243})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
600
0.7000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:58:37,324 - INFO - Use schedular
2022-09-11 20:58:37,330 - INFO - start training..


600


# Epoch 499: train_loss: 0.4606: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:19<00:00, 26.26it/s]


first cluster:
0.32758721755777315
结果:Counter({0: 823, 5: 680, 6: 608, 2: 487, 3: 411, 1: 387, 4: 243})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3088346306445897
结果:Counter({5: 822, 1: 659, 2: 646, 4: 455, 6: 425, 0: 374, 3: 258})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
600
0.8


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:59:00,514 - INFO - Use schedular
2022-09-11 20:59:00,515 - INFO - start training..


600


# Epoch 499: train_loss: 0.5625: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 29.98it/s]


first cluster:
0.32499096894329393
结果:Counter({3: 808, 0: 668, 6: 631, 4: 495, 5: 401, 1: 376, 2: 260})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3245237488314663
结果:Counter({2: 760, 6: 711, 3: 637, 5: 515, 0: 396, 1: 347, 4: 273})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
600
0.9


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:59:20,456 - INFO - Use schedular
2022-09-11 20:59:20,457 - INFO - start training..


600


# Epoch 499: train_loss: 0.7034: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 29.88it/s]


first cluster:
0.3240235780638329
结果:Counter({2: 752, 4: 686, 1: 573, 3: 562, 0: 394, 6: 393, 5: 279})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.31691037286930124
结果:Counter({3: 743, 5: 685, 2: 618, 6: 554, 1: 389, 0: 380, 4: 270})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
700
0.0


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:59:40,438 - INFO - Use schedular
2022-09-11 20:59:40,439 - INFO - start training..


700


# Epoch 499: train_loss: nan: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:15<00:00, 32.27it/s]


first cluster:
0.13661177703231706
结果:Counter({1: 1493, 6: 525, 5: 486, 2: 460, 4: 229, 3: 227, 0: 219})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.15524421800128108
结果:Counter({4: 1357, 6: 852, 1: 365, 5: 336, 2: 290, 3: 244, 0: 195})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
700
0.1


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 20:59:59,439 - INFO - Use schedular
2022-09-11 20:59:59,440 - INFO - start training..


700


# Epoch 499: train_loss: 0.2873: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 30.32it/s]


first cluster:
0.2144740895764231
结果:Counter({5: 1368, 0: 665, 2: 529, 1: 416, 4: 339, 6: 307, 3: 15})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.21927967760810155
结果:Counter({6: 1381, 5: 716, 4: 498, 1: 382, 3: 345, 0: 302, 2: 15})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
700
0.2


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:00:19,727 - INFO - Use schedular
2022-09-11 21:00:19,729 - INFO - start training..


700


# Epoch 499: train_loss: 0.2810: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:18<00:00, 26.54it/s]


first cluster:
0.33690221269338755
结果:Counter({6: 793, 5: 743, 0: 527, 4: 509, 2: 420, 1: 355, 3: 292})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.28987611648982325
结果:Counter({6: 821, 5: 819, 0: 617, 3: 397, 2: 367, 4: 341, 1: 277})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
700
0.30000000000000004


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:00:48,809 - INFO - Use schedular
2022-09-11 21:00:48,810 - INFO - start training..


700


# Epoch 499: train_loss: 0.2876: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:17<00:00, 28.30it/s]


first cluster:
0.29703331649459636
结果:Counter({5: 881, 4: 797, 1: 493, 6: 431, 3: 409, 0: 348, 2: 280})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.343740667458722
结果:Counter({0: 851, 1: 824, 4: 514, 2: 413, 5: 404, 3: 349, 6: 284})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
700
0.4


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:01:10,721 - INFO - Use schedular
2022-09-11 21:01:10,722 - INFO - start training..


700


# Epoch 499: train_loss: 0.3086: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:17<00:00, 29.07it/s]


first cluster:
0.3532307916445209
结果:Counter({6: 860, 3: 749, 0: 501, 4: 483, 5: 413, 1: 370, 2: 263})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3430514994473769
结果:Counter({6: 838, 5: 758, 0: 522, 3: 493, 4: 408, 1: 346, 2: 274})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
700
0.5


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:01:31,877 - INFO - Use schedular
2022-09-11 21:01:31,878 - INFO - start training..


700


# Epoch 499: train_loss: 0.3452: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:18<00:00, 27.77it/s]


first cluster:
0.33102616810767066
结果:Counter({1: 881, 2: 725, 6: 537, 5: 476, 3: 386, 0: 375, 4: 259})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.33057984237819565
结果:Counter({1: 875, 6: 735, 5: 578, 0: 464, 4: 372, 2: 363, 3: 252})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
700
0.6000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:01:53,694 - INFO - Use schedular
2022-09-11 21:01:53,695 - INFO - start training..


700


# Epoch 499: train_loss: 0.3861: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:19<00:00, 26.18it/s]


first cluster:
0.337038757013725
结果:Counter({4: 981, 3: 761, 0: 702, 5: 531, 1: 246, 6: 212, 2: 206})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.33350454213310926
结果:Counter({6: 832, 0: 711, 2: 589, 5: 460, 4: 438, 1: 360, 3: 249})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
700
0.7000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:02:17,753 - INFO - Use schedular
2022-09-11 21:02:17,754 - INFO - start training..


700


# Epoch 499: train_loss: 0.4463: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:18<00:00, 27.13it/s]


first cluster:
0.3472644085845035
结果:Counter({4: 854, 0: 846, 2: 700, 5: 585, 1: 231, 6: 220, 3: 203})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3374917787736315
结果:Counter({1: 874, 5: 856, 3: 731, 4: 525, 2: 247, 0: 212, 6: 194})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
700
0.8


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:02:39,563 - INFO - Use schedular
2022-09-11 21:02:39,564 - INFO - start training..


700


# Epoch 499: train_loss: 0.5601: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:18<00:00, 26.56it/s]


first cluster:
0.33802002333594683
结果:Counter({5: 755, 1: 679, 3: 675, 4: 550, 2: 371, 6: 351, 0: 258})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.34163647221072047
结果:Counter({2: 757, 5: 702, 6: 648, 4: 507, 1: 408, 3: 386, 0: 231})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
700
0.9


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:03:02,056 - INFO - Use schedular
2022-09-11 21:03:02,057 - INFO - start training..


700


# Epoch 499: train_loss: 0.7189: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:16<00:00, 30.64it/s]


first cluster:
0.3317173590881764
结果:Counter({0: 803, 5: 741, 2: 631, 6: 466, 1: 404, 3: 314, 4: 280})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3308426556060349
结果:Counter({5: 818, 3: 745, 2: 640, 6: 459, 4: 377, 0: 305, 1: 295})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
800
0.0


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:03:21,909 - INFO - Use schedular
2022-09-11 21:03:21,911 - INFO - start training..


800


# Epoch 499: train_loss: nan: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:13<00:00, 37.69it/s]


first cluster:
0.2940811931222638
结果:Counter({6: 1021, 0: 875, 2: 702, 3: 398, 5: 347, 1: 278, 4: 18})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.17470628232850308
结果:Counter({1: 1341, 4: 955, 0: 420, 2: 356, 5: 293, 6: 266, 3: 8})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
800
0.1


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:03:39,250 - INFO - Use schedular
2022-09-11 21:03:39,251 - INFO - start training..


800


# Epoch 499: train_loss: 0.2936: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:13<00:00, 36.66it/s]


first cluster:
0.228821356691231
结果:Counter({5: 1347, 3: 571, 4: 570, 6: 420, 1: 392, 0: 321, 2: 18})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.2733847555144495
结果:Counter({2: 1285, 1: 587, 4: 453, 0: 422, 3: 372, 5: 311, 6: 209})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
800
0.2


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:03:56,496 - INFO - Use schedular
2022-09-11 21:03:56,497 - INFO - start training..


800


# Epoch 499: train_loss: 0.2807: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:13<00:00, 37.01it/s]


first cluster:
0.3578538038957833
结果:Counter({1: 899, 0: 641, 4: 514, 6: 510, 5: 410, 3: 362, 2: 303})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.25097905473172644
结果:Counter({3: 1128, 2: 629, 5: 466, 4: 400, 1: 378, 0: 348, 6: 290})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
800
0.30000000000000004


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:04:13,601 - INFO - Use schedular
2022-09-11 21:04:13,602 - INFO - start training..


800


# Epoch 499: train_loss: 0.2967: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:13<00:00, 36.43it/s]


first cluster:
0.35922472926494053
结果:Counter({2: 964, 4: 572, 6: 535, 0: 459, 5: 440, 1: 373, 3: 296})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.35428737590526654
结果:Counter({5: 961, 3: 595, 2: 541, 6: 453, 4: 411, 1: 402, 0: 276})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
800
0.4


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:04:31,031 - INFO - Use schedular
2022-09-11 21:04:31,032 - INFO - start training..


800


# Epoch 499: train_loss: 0.3223: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:14<00:00, 35.70it/s]


first cluster:
0.3388047777025608
结果:Counter({6: 818, 0: 769, 1: 510, 5: 473, 4: 414, 2: 401, 3: 254})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.356387833681516
结果:Counter({5: 808, 4: 700, 0: 526, 1: 495, 6: 456, 2: 388, 3: 266})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
800
0.5


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:04:48,463 - INFO - Use schedular
2022-09-11 21:04:48,463 - INFO - start training..


800


# Epoch 499: train_loss: 0.3437: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 47.07it/s]


first cluster:
0.3588817531091981
结果:Counter({0: 828, 3: 713, 6: 498, 2: 490, 5: 486, 1: 360, 4: 264})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3352475953085028
结果:Counter({6: 809, 5: 778, 0: 599, 4: 458, 2: 373, 1: 364, 3: 258})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
800
0.6000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:05:02,484 - INFO - Use schedular
2022-09-11 21:05:02,485 - INFO - start training..


800


# Epoch 499: train_loss: 0.3838: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 49.25it/s]


first cluster:
0.3279141897116558
结果:Counter({6: 804, 5: 702, 0: 681, 2: 488, 1: 355, 3: 348, 4: 261})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3095863412507416
结果:Counter({2: 924, 6: 797, 5: 731, 0: 497, 4: 250, 3: 237, 1: 203})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
800
0.7000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:05:16,042 - INFO - Use schedular
2022-09-11 21:05:16,043 - INFO - start training..


800


# Epoch 499: train_loss: 0.4739: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 48.57it/s]


first cluster:
0.3340168224541561
结果:Counter({4: 850, 2: 828, 5: 792, 0: 519, 3: 245, 6: 217, 1: 188})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3385755620296561
结果:Counter({0: 843, 2: 820, 5: 778, 6: 544, 1: 241, 4: 207, 3: 206})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
800
0.8


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:05:29,432 - INFO - Use schedular
2022-09-11 21:05:29,432 - INFO - start training..


800


# Epoch 499: train_loss: 0.5703: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 48.14it/s]


first cluster:
0.32446163247588033
结果:Counter({3: 783, 2: 674, 5: 651, 6: 532, 4: 390, 0: 350, 1: 259})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3222417309911862
结果:Counter({3: 814, 4: 665, 6: 624, 5: 534, 1: 374, 2: 362, 0: 266})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
800
0.9


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:05:43,015 - INFO - Use schedular
2022-09-11 21:05:43,015 - INFO - start training..


800


# Epoch 499: train_loss: 0.7138: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 47.85it/s]


first cluster:
0.33594709322796545
结果:Counter({6: 749, 5: 673, 1: 573, 4: 561, 2: 409, 0: 399, 3: 275})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3321942224765099
结果:Counter({1: 756, 5: 668, 4: 583, 3: 571, 6: 396, 0: 393, 2: 272})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
900
0.0


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:05:56,543 - INFO - Use schedular
2022-09-11 21:05:56,544 - INFO - start training..


900


# Epoch 499: train_loss: nan: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:09<00:00, 51.14it/s]


first cluster:
0.20689355153401467
结果:Counter({0: 1176, 1: 732, 6: 482, 5: 372, 3: 369, 2: 314, 4: 194})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.19136371453809195
结果:Counter({5: 1131, 3: 725, 2: 559, 4: 387, 0: 350, 1: 304, 6: 183})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
900
0.1


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:06:10,029 - INFO - Use schedular
2022-09-11 21:06:10,030 - INFO - start training..


900


# Epoch 499: train_loss: 0.2959: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 49.70it/s]


first cluster:
0.22065302051627825
结果:Counter({1: 1253, 5: 540, 6: 507, 2: 388, 0: 374, 3: 312, 4: 265})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.2008968436209348
结果:Counter({0: 1160, 6: 703, 3: 527, 4: 377, 1: 356, 5: 312, 2: 204})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
900
0.2


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:06:23,451 - INFO - Use schedular
2022-09-11 21:06:23,452 - INFO - start training..


900


# Epoch 499: train_loss: 0.2875: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 49.04it/s]


first cluster:
0.3268491408870189
结果:Counter({6: 878, 0: 802, 5: 496, 4: 450, 3: 367, 1: 366, 2: 280})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.35530019585194644
结果:Counter({6: 867, 0: 751, 5: 480, 3: 453, 2: 411, 1: 374, 4: 303})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
900
0.30000000000000004


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:06:36,791 - INFO - Use schedular
2022-09-11 21:06:36,791 - INFO - start training..


900


# Epoch 499: train_loss: 0.3020: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 48.31it/s]


first cluster:
0.35011619243215436
结果:Counter({5: 847, 6: 798, 2: 519, 4: 446, 3: 375, 1: 371, 0: 283})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3251581545025984
结果:Counter({0: 935, 2: 780, 4: 481, 5: 424, 3: 393, 1: 353, 6: 273})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
900
0.4


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:06:50,736 - INFO - Use schedular
2022-09-11 21:06:50,737 - INFO - start training..


900


# Epoch 499: train_loss: 0.3245: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 47.80it/s]


first cluster:
0.3224049093776049
结果:Counter({4: 837, 1: 819, 5: 509, 6: 458, 2: 393, 3: 360, 0: 263})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3248018571183909
结果:Counter({2: 824, 5: 741, 6: 520, 0: 497, 3: 399, 1: 385, 4: 273})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
900
0.5


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:07:04,276 - INFO - Use schedular
2022-09-11 21:07:04,277 - INFO - start training..


900


# Epoch 499: train_loss: 0.3463: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 48.55it/s]


first cluster:
0.3689221529013893
结果:Counter({6: 837, 2: 804, 3: 485, 4: 477, 0: 412, 5: 375, 1: 249})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.34245557756915534
结果:Counter({0: 811, 6: 759, 4: 575, 3: 491, 5: 374, 1: 371, 2: 258})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
900
0.6000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:07:17,662 - INFO - Use schedular
2022-09-11 21:07:17,663 - INFO - start training..


900


# Epoch 499: train_loss: 0.3999: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 47.28it/s]


first cluster:
0.3368868540193542
结果:Counter({0: 820, 5: 755, 6: 616, 2: 500, 1: 369, 3: 338, 4: 241})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.22312742073075376
结果:Counter({5: 1088, 1: 867, 2: 548, 4: 477, 3: 245, 6: 218, 0: 196})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
900
0.7000000000000001


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:07:31,396 - INFO - Use schedular
2022-09-11 21:07:31,397 - INFO - start training..


900


# Epoch 499: train_loss: 0.4623: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 47.70it/s]


first cluster:
0.22519381343237138
结果:Counter({6: 985, 0: 938, 4: 541, 5: 485, 1: 246, 3: 241, 2: 203})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.3428837845439355
结果:Counter({1: 917, 2: 735, 5: 727, 4: 608, 6: 271, 3: 208, 0: 173})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
900
0.8


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:07:45,012 - INFO - Use schedular
2022-09-11 21:07:45,013 - INFO - start training..


900


# Epoch 499: train_loss: 0.5735: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 47.00it/s]


first cluster:
0.3577692373603097
结果:Counter({5: 786, 3: 728, 6: 604, 1: 508, 2: 404, 4: 331, 0: 278})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.34892552951542666
结果:Counter({6: 791, 2: 700, 1: 642, 3: 524, 0: 382, 5: 330, 4: 270})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
900
0.9


  utils.warn_names_duplicates("var")
  utils.warn_names_duplicates("var")
2022-09-11 21:07:59,200 - INFO - Use schedular
2022-09-11 21:07:59,201 - INFO - start training..


900


# Epoch 499: train_loss: 0.7232: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:10<00:00, 47.63it/s]


first cluster:
0.32795855875395674
结果:Counter({2: 829, 4: 706, 6: 609, 5: 499, 1: 424, 0: 300, 3: 272})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})
second cluster:
0.33520666669969373
结果:Counter({0: 767, 5: 687, 4: 655, 3: 515, 6: 402, 1: 311, 2: 302})
ground_truth :Counter({2: 989, 5: 692, 4: 673, 7: 513, 0: 273, 1: 253, 3: 218, 6: 28})


In [17]:
np.arange(1000,4000,500) + np.arange(1000,4000,500)

array([2000, 3000, 4000, 5000, 6000, 7000])

In [20]:
np.append(np.arange(100,1000,100),np.arange(1000,4000,500))

array([ 100,  200,  300,  400,  500,  600,  700,  800,  900, 1000, 1500,
       2000, 2500, 3000, 3500])

In [22]:
15*8*3*10

3600

In [None]:
for num_set in np.arange(1000,4000,500):
    print(num_set)
    args = parser.parse_args([])
    args.lr = 0.001
    args.lr_f = 0.01
    args.num_hidden = 512
    args.num_heads = 4
    args.weight_decay = 2e-4
    args.weight_decay_f= 1e-4
    args.max_epoch= 500
    args.max_epoch_f= 500
    args.mask_rate= 0.4
    args.num_layers= 2
    args.encoder= "gat"
    args.decoder= "gat" 
    args.activation= "prelu"
    args.in_drop= 0.2
    args.attn_drop= 0.1
    args.linear_prob= True
    args.loss_fn= "sce" 
    args.drop_edge_rate=0.0
    args.optimizer= "adam"
    args.replace_rate= 0.05 
    args.alpha_l= 3
    args.scheduler= True
    args.dataset = "sp"


    #参数传递
    device = args.device if args.device >= 0 else "cpu"
    seeds = args.seeds
    dataset_name = args.dataset
    max_epoch = args.max_epoch
    max_epoch_f = args.max_epoch_f
    num_hidden = args.num_hidden
    num_layers = args.num_layers
    encoder_type = args.encoder
    decoder_type = args.decoder
    replace_rate = args.replace_rate

    optim_type = args.optimizer 
    loss_fn = args.loss_fn

    lr = args.lr
    weight_decay = args.weight_decay
    lr_f = args.lr_f
    weight_decay_f = args.weight_decay_f
    linear_prob = args.linear_prob
    load_model = args.load_model
    save_model = args.save_model
    logs = args.logging
    use_scheduler = args.scheduler
    args.num_features = num_features

    acc_list = []
    estp_acc_list = []
    times = 3

    #print(f"####### Run {i} for seed {seed}")
    #print(i)
    seed = 0
    set_random_seed(seed)
    if logs:
        logger = TBLogger(name=f"{dataset_name}_loss_{loss_fn}_rpr_{replace_rate}_nh_{num_hidden}_nl_{num_layers}_lr_{lr}_mp_{max_epoch}_mpf_{max_epoch_f}_wd_{weight_decay}_wdf_{weight_decay_f}_{encoder_type}_{decoder_type}")
    else:
        logger = None
    model = build_model(args)
    device = 1
    model.to(device)
    optimizer = create_optimizer(optim_type, model, lr, weight_decay)

    if use_scheduler:
        logging.info("Use schedular")
        scheduler = lambda epoch :( 1 + np.cos((epoch) * np.pi / max_epoch) ) * 0.5
        # scheduler = lambda epoch: epoch / warmup_steps if epoch < warmup_steps \
                # else ( 1 + np.cos((epoch - warmup_steps) * np.pi / (max_epoch - warmup_steps))) * 0.5
        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=scheduler)
    else:
        scheduler = None
    #训练模型
    x = graph.ndata["feat"]
    if not load_model:
        model = pretrain(model, graph, x, optimizer, max_epoch, device, scheduler, num_classes, lr_f, weight_decay_f, max_epoch_f, linear_prob, logger)
        model = model.cpu()
    x = graph.ndata["feat"]
    model.to(device)
    embedding = model.embed(graph.to(device), x.to(device))
    new_pred = kMeans_use(embedding.cpu().detach().numpy(),num_classes)
    adata.obs["pre"] = new_pred
    score = adjusted_rand_score(adata.obs.lay_num.values, new_pred )
    print("first cluster:")
    print(score)
    print("结果:" + str(Counter(new_pred)))
    print("ground_truth :"  + str(Counter(adata.obs.lay_num.values)))
    x = graph.ndata["feat"]
    test = model.embed(graph.to(device), x.to(device))
    test_new_pred = kMeans_use(test.cpu().detach().numpy(),num_classes)
    score = adjusted_rand_score(adata.obs.lay_num.values, test_new_pred )
    adata.obs["second_pre"] = test_new_pred
    print("second cluster:")
    print(score)
    print("结果:" + str(Counter(test_new_pred)))
    print("ground_truth :"  + str(Counter(adata.obs.lay_num.values)))
    #drawPicture(adata.obs,"imagecol","imagerow",colorattribute="lay_num",save_file= None)
    #drawPicture(adata.obs,"imagecol","imagerow",colorattribute="pre",save_file= None)
    #drawPicture(adata.obs,"imagecol","imagerow",colorattribute="second_pre",save_file= None)

In [None]:
drawPicture(adata.obs,"imagecol","imagerow",colorattribute="second_pre",save_file= None)

In [None]:
# #训练模型
# x = graph.ndata["feat"]
# if not load_model:
#     model = pretrain(model, graph, x, optimizer, max_epoch, device, scheduler, num_classes, lr_f, weight_decay_f, max_epoch_f, linear_prob, logger)
#     model = model.cpu()

In [None]:
# x = graph.ndata["feat"]
# model.to(device)
# embedding = model.embed(graph.to(device), x.to(device))
# new_pred = kMeans_use(embedding.cpu().detach().numpy(),num_classes)
# adata.obs["pre"] = new_pred
# score = adjusted_rand_score(adata.obs.lay_num.values, new_pred )
# print("first cluster:")
# print(score)
# print("结果:" + str(Counter(new_pred)))
# print("ground_truth :"  + str(Counter(adata.obs.lay_num.values)))

In [None]:
# x = graph.ndata["feat"]
# test = model.embed(graph.to(device), x.to(device))
# test_new_pred = kMeans_use(test.cpu().detach().numpy(),num_classes)
# score = adjusted_rand_score(adata.obs.lay_num.values, test_new_pred )
# adata.obs["second_pre"] = test_new_pred
# print("second cluster:")
# print(score)
# print("结果:" + str(Counter(test_new_pred)))
# print("ground_truth :"  + str(Counter(adata.obs.lay_num.values)))

In [None]:
# drawPicture(adata.obs,"imagecol","imagerow",colorattribute="lay_num",save_file= None)
# drawPicture(adata.obs,"imagecol","imagerow",colorattribute="pre",save_file= None)
# drawPicture(adata.obs,"imagecol","imagerow",colorattribute="second_pre",save_file= None)