In [1]:
import pandas as pd
from pandas import DataFrame
import csv
import networkx as nx

all_data=pd.read_table('pty_cust_final_data.txt',delimiter='|')

cleaned_all_data = all_data.dropna().drop_duplicates() 

#只关注部分关系链条
cleaned_all_data = cleaned_all_data.loc[cleaned_all_data['Pty_Rela_Type_Cd'].isin(
    ['CMS0_A001','CMS0_A002','CMS0_A003','CMS0_D011','CMS0_D012',
     'CMS0_D013','CMS0_D014','CMS0_F001','CMS0_F002','CMS0_F003', 
     'CMS0_F004','CMS0_F011','CMS0_O001','CMS0_O002'])]
cleaned_all_data = cleaned_all_data.reset_index(drop=True)

All_source = list(cleaned_all_data['BCS_PTY_ID'])
All_target = list(cleaned_all_data['BCS_assoc_pty_id'])
All_relation = list(cleaned_all_data['Pty_Rela_Type_Cd'])

### 构建全部关系一度链表
df1 = DataFrame({'source': All_source, 'target': All_target, 'relation': All_relation})
df2 = DataFrame({'source2': All_source, 'target1': All_target, 'relation1': All_relation})

All_degree1_list = pd.merge(df1,df2,left_on = 'target',right_on = 'source2')

All_degree1_list.drop(columns = ['source2'], inplace = True)

#全部一度关系
All_degree1_list = All_degree1_list.drop_duplicates()

D1_source = list(All_degree1_list['source'])
D1_target = list(All_degree1_list['target'])
D1_target1 = list(All_degree1_list['target1'])

df3 = DataFrame({'source2': All_source, 'target2': All_target, 'relation2': All_relation})

#全部二度关系
All_degree2_list = pd.merge(All_degree1_list,df3,left_on = 'target1',right_on = 'source2')

All_degree2_list.drop(columns = ['source2'], inplace = True)

#生成客户号姓名字典
N_list = cleaned_all_data.copy()
N_list.drop(columns = ['Pty_Rela_Type_Cd'],inplace = True)
N_list.drop(columns = ['BCS_PTY_ID'],inplace = True)
N_list.drop_duplicates()
NameDict = dict(zip(N_list['BCS_assoc_pty_id'],N_list['Cust_Name']))
#关系类型字典
type_dict = {'CMS0_A001':'全资子公司','CMS0_A002':'控股子公司',
             'CMS0_A003':'一般关联企业','CMS0_D011':'法人',
             'CMS0_D012':'总经理','CMS0_D013':'财务负责人',
             'CMS0_D014':'实际控制人','CMS0_F001':'配偶',
             'CMS0_F002':'子女','CMS0_F003':'父母',
             'CMS0_F004':'兄弟姐妹','CMS0_F011':'共有股东',
             'CMS0_O001':'控股股东','CMS0_O002':'股东'
            }

In [3]:
G = nx.MultiDiGraph()
#center = 22648887#万达 # 中心点客户客户号，查看以该客户为中心步长3以内的全部链条
center = 40393774 #恒大
#全部原始edges
G_origin_edges = cleaned_all_data.loc[cleaned_all_data['BCS_PTY_ID'] == center].drop_duplicates().reset_index(drop = True)

#原始关系链表
G_origin_edges.drop(columns = ['Cust_Name'], inplace = True)

G_1d_edges = All_degree1_list.loc[All_degree1_list['source'] == center].drop_duplicates().reset_index(drop = True)

G_1d_edges.drop(columns = ['relation'], inplace = True)
G_1d_edges.drop(columns = ['source'], inplace = True)

G_1d_edges = G_1d_edges.drop_duplicates()

#一度关系列表 G_1d_edges
G_1d_edges.columns = ['BCS_PTY_ID','Pty_Rela_Type_Cd','BCS_assoc_pty_id']
x = G_1d_edges['Pty_Rela_Type_Cd']
G_1d_edges.drop(labels=['Pty_Rela_Type_Cd'], axis=1,inplace = True)
G_1d_edges.insert(2, 'Pty_Rela_Type_Cd', x)

#二度关系链表 G_2d_edges
G_2d_edges = All_degree2_list.loc[All_degree2_list['source'] == center].drop_duplicates().reset_index(drop = True)
G_2d_edges.drop(columns = ['relation'], inplace = True)
G_2d_edges.drop(columns = ['source'], inplace = True)
G_2d_edges.drop(columns = ['target'], inplace = True)
G_2d_edges.drop(columns = ['relation1'], inplace = True)
G_2d_edges = G_2d_edges.drop_duplicates()

G_2d_edges.columns = ['BCS_PTY_ID', 'Pty_Rela_Type_Cd', 'BCS_assoc_pty_id']

y = G_2d_edges['Pty_Rela_Type_Cd']
G_2d_edges.drop(labels=['Pty_Rela_Type_Cd'], axis=1,inplace = True)
G_2d_edges.insert(2, 'Pty_Rela_Type_Cd', y)

#全部关系汇总
edge_view = pd.concat([G_1d_edges,G_origin_edges,G_2d_edges]).reset_index(drop = True)
edge_view = edge_view.drop_duplicates()

edge_view['Pty_Rela_Type_Cd'] = edge_view['Pty_Rela_Type_Cd'].replace(type_dict)

G.add_edges_from(list(zip(G_origin_edges['BCS_PTY_ID'], G_origin_edges['BCS_assoc_pty_id'])))
G.add_edges_from(list(zip(G_1d_edges['BCS_PTY_ID'], G_1d_edges['BCS_assoc_pty_id'])))
G.add_edges_from(list(zip(G_2d_edges['BCS_PTY_ID'], G_2d_edges['BCS_assoc_pty_id'])))

node = list(G.nodes())

#内存原因 先生成子字典
subname_list = []
for i in node:
    name = NameDict[i]
    subname_list.append(name)
sub_nameDict = dict(zip(node,subname_list))

edge_view.columns = ['source','target','lable']
#依据客户号 替换为客户名称
edge_view['source'] = edge_view['source'].replace(sub_nameDict)
edge_view['target'] = edge_view['target'].replace(sub_nameDict)

edge_view = edge_view.drop_duplicates()
#生成边名单表格
edge_view.to_excel('恒大.xlsx',index = False)

1.提取关系 分别为 直接、一度、二度 concat到一起 然后去重 包含 两个客户号 一个relation
2.查找后替换relation为中文标志
3.通过查找dict 给node打标 输出到node_lable.csv
4.得到表格xxx.xlsx