In [118]:

import numpy as np
import pandas as pd
import streamlit as st
import re
from PIL import Image
from collections import defaultdict

def has_chinese(s):
    return bool(re.search('[\u4e00-\u9fff]', s))

def get_top_n_subsidiaries(edges2, n=5):

    subsidiaries_dict = defaultdict(list)

    for edge in edges2:
        company_name = edge[0]
        subsidiary = edge[1]
        holding_ratio = edge[2]
        if holding_ratio:
            subsidiaries_dict[company_name].append(edge[1:])

    top_n_subsidiaries = {}
    for company, subsidiaries in subsidiaries_dict.items():
        sorted_subsidiaries = sorted(subsidiaries, key=lambda x: x[1], reverse=True)
        top_n_subsidiaries[company] = sorted_subsidiaries[:n]

    final_edges = [(company, *sub_company) for company, sub_companies in top_n_subsidiaries.items() for sub_company in sub_companies]

    return final_edges

def get_stock_name(data, ticker):
    return data[data['Symbol'] == ticker]['Name'].values[0]

def sub_company(df, not_first_layer): # 给定dataframe，返回edges，以及子公司的(company, ticker)对
    if df.shape[0]:
        if not_first_layer:
            edges = list(zip([not_first_layer] * df.shape[0], df['RalatedParty'], df['DirectHoldingRatio'].astype(float), df['IndirectHoldingRatio'], df['is_foreign'], df['Relationship'], df['is_subsidiary_listed']))
        else:
            edges = list(zip(df['Name'], df['RalatedParty'], df['DirectHoldingRatio'].astype(float), df['IndirectHoldingRatio'], df['is_foreign'], df['Relationship'], df['is_subsidiary_listed']))
        descendant = df[~df['Sub_Symbol'].isna()]
        return edges, list(zip(descendant['RalatedParty'], descendant['Sub_Symbol']))
    else:
        return [], []

def search(data, input_ticker):
    level = 0
    edges = []
    new_edges, subsidiary = sub_company(data[data['Symbol'] == input_ticker], not_first_layer=False)
    while True:
        edges = edges + [i + (level,) for i in new_edges]
        level += 1
        gen = list((sub_company(data[data['Symbol'] == ticker], not_first_layer=company) for company, ticker in subsidiary))
        new_edges, subsidiary = [edge + (level,) for edges, _ in gen for edge in edges], [temp for _, temps in gen for temp in temps]
        if len(subsidiary) == 0 or level == 3:
            break
    edges = [edge for edge in edges if edge[6] == '1' or not pd.isnull(edge[2])]
    return edges


In [131]:
data = pd.read_csv('data/data.csv', dtype=str) # pd.read_excel('data/data.xlsx')[2:].reset_index(drop=True)
df = pd.read_csv('data/十大股东.csv', dtype=str)

# data['is_foreign'] = data['RalatedParty'].apply(lambda x: 0 if has_chinese(x) else 1)
# map = pd.read_excel('map.xlsx', dtype=str)
# map_dict = dict(zip(map['公司名称'], map['证券代码.x']))
# data['Sub_Symbol'] = data['RalatedParty'].map(map_dict)
# data['is_subsidiary_listed'] = 0
# data.loc[~data['Sub_Symbol'].isna(), 'is_subsidiary_listed'] = 1

In [150]:
def Shareholder(df, not_first_layer): # 给定dataframe，返回edges，以及子公司的(company, ticker)对
    if df.shape[0]:
        if not_first_layer:
            edges = list(zip([not_first_layer] * df.shape[0], df['Shareholder_Name'], df['Shareholding_Ratio'].astype(float), df['Shares_Number'], df['is_foreign'], df['Shareholder_Nature'], df['is_subsidiary_listed']))
        else:
            edges = list(zip(df['Name'], df['Shareholder_Name'], df['Shareholding_Ratio'].astype(float), df['Shares_Number'], df['is_foreign'], df['Shareholder_Nature'], df['is_subsidiary_listed']))
        descendant = df[~df['Sub_Symbol'].isna()]
        return edges, list(zip(descendant['Shareholder_Name'], descendant['Sub_Symbol']))
    else:
        return [], []

def search_shareholder(data, input_ticker):
    level = -1
    edges = []
    new_edges, shareholders = Shareholder(data[data['Symbol'] == input_ticker], not_first_layer=False)
    while True:
        edges = edges + [i + (level,) for i in new_edges]
        level -= 1
        gen = list((Shareholder(data[data['Symbol'] == ticker], not_first_layer=company) for company, ticker in shareholders))
        new_edges, shareholders = [edge + (level,) for edges, _ in gen for edge in edges], [temp for _, temps in gen for temp in temps]
        if len(shareholders) == 0 or level == -3:
            break
    edges = [edge for edge in edges if edge[6] == '1' or not pd.isnull(edge[2])]
    return edges

In [164]:
edges_shareholder_1_2[0]

[('中信证券股份有限公司',
  '香港中央结算(代理人)有限公司',
  17.67,
  '2619001296',
  '0',
  '境外法人',
  '0',
  -2,
  -2),
 ('中信证券股份有限公司', '中国中信有限公司', 15.52, '2299650108', '0', '国有法人', '0', -2, -2),
 ('中信证券股份有限公司', '广州越秀资本控股集团有限公司', 4.23, '626191828', '0', '国有法人', '0', -2, -2),
 ('中信证券股份有限公司', '香港中央结算有限公司', 3.38, '501315511', '0', '境外法人', '0', -2, -2),
 ('中信证券股份有限公司',
  '广州越秀资本控股集团股份有限公司',
  2.06,
  '305155945',
  '0',
  '国有法人',
  '1',
  -2,
  -2),
 ('中信证券股份有限公司',
  '中国建设银行股份有限公司-国泰中证全指证券公司交易型开放式指数证券投资基金',
  1.48,
  '219087656',
  '0',
  '其他',
  '0',
  -2,
  -2),
 ('中信证券股份有限公司', '中央汇金资产管理有限责任公司', 1.38, '205146964', '0', '国有法人', '0', -2, -2),
 ('中信证券股份有限公司',
  '大成基金-农业银行-大成中证金融资产管理计划',
  1.19,
  '176785150',
  '0',
  '其他',
  '0',
  -2,
  -2),
 ('中信证券股份有限公司',
  '华夏基金-农业银行-华夏中证金融资产管理计划',
  1.12,
  '166143027',
  '0',
  '其他',
  '0',
  -2,
  -2),
 ('中信证券股份有限公司',
  '中欧基金-农业银行-中欧中证金融资产管理计划',
  1.09,
  '161205735',
  '0',
  '其他',
  '0',
  -2,
  -2)]

In [None]:
edges_shareholder_1_2 = [[i for i in sublist if i[1] not in [i[0][0] for i in edges_shareholder_1_2]] for sublist in edges_shareholder_1_2]

In [None]:

if number_of_layer_neg2:
    edges_shareholder_1_2.sort(key=lambda x: x[0])
    edges_shareholder_1_2 = [list(group) for key, group in groupby(edges_shareholder_1_2, key=lambda x: x[0])]
    edges_shareholder_1_2 = [[i for i in sublist if i[1] not in [i[0][0] for i in edges_shareholder_1_2]] for sublist in edges_shareholder_1_2]
    x_pos = x_layer1_first
    for mini_edges_layer_1_2 in edges_shareholder_1_2:
        company = mini_edges_layer_1_2[0][0]
        number = len(mini_edges_layer_1_2)
        x_interval = 120
        for element in elements1:
            if element['id'] == company:
                position = element['position']
                break
        x = position['x']
        for index, edge in enumerate(mini_edges_layer_1_2): # 子上市公司k - 子子公司k1, ..., kn
            x_pos += x_interval
            y_pos = y - 800
            try:
                elements1.extend([{"id": edge[1], "data": {"label": edge[1]}, "position": {"x": x_pos, "y": y_pos}, "style": {"width": "80px", "height": "200px"}}])
                elements2.extend([{"id": f"{edge[0]}-{edge[1]}", "source": edge[1], "target": edge[0], "label": (str(edge[2]) + '%' if not pd.isnull(edge[2]) else '')},])
            except:
                pass

elements = elements1 + elements2

In [141]:
df.to_csv('data/十大股东.csv', index=False)