In [112]:
import numpy as np
import pandas as pd
import streamlit as st
import re
from PIL import Image
from collections import defaultdict

def get_top_n_subsidiaries(edges2, n=5):

    subsidiaries_dict = defaultdict(list)

    for edge in edges2:
        company_name = edge[0]
        subsidiary = edge[1]
        holding_ratio = edge[2]
        if holding_ratio:
            subsidiaries_dict[company_name].append(edge[1:])

    top_n_subsidiaries = {}
    for company, subsidiaries in subsidiaries_dict.items():
        sorted_subsidiaries = sorted(subsidiaries, key=lambda x: x[1], reverse=True)
        top_n_subsidiaries[company] = sorted_subsidiaries[:n]

    final_edges = [(company, *sub_company) for company, sub_companies in top_n_subsidiaries.items() for sub_company in sub_companies]

    return final_edges

def get_stock_name(data, ticker):
    return data[data['Symbol'] == ticker]['Name'].values[0]

def sub_company(df, not_first_layer): # 给定dataframe，返回edges，以及子公司的(company, ticker)对
    if df.shape[0]:
        if not_first_layer:
            edges = list(zip([not_first_layer] * df.shape[0], df['RalatedParty'], df['DirectHoldingRatio'].astype(float), df['IndirectHoldingRatio'], df['is_foreign'], df['Relationship'], df['is_subsidiary_listed']))
        else:
            edges = list(zip(df['Name'], df['RalatedParty'], df['DirectHoldingRatio'].astype(float), df['IndirectHoldingRatio'], df['is_foreign'], df['Relationship'], df['is_subsidiary_listed']))
        descendant = df[~df['Sub_Symbol'].isna()]
        return edges, list(zip(descendant['RalatedParty'], descendant['Sub_Symbol']))
    else:
        return [], []

def search(data, input_ticker):
    level = 0
    edges = []
    new_edges, subsidiary = sub_company(data[data['Symbol'] == input_ticker], not_first_layer=False)
    while True:
        edges = edges + new_edges
        gen = list((sub_company(data[data['Symbol'] == ticker], not_first_layer=company) for company, ticker in subsidiary))
        new_edges, subsidiary = [edge for edges, _ in gen for edge in edges], [temp for _, temps in gen for temp in temps]
        level += 1
        if len(subsidiary) == 0 or level == 3:
            break

    return edges


In [103]:
data = pd.read_csv('data/data.csv', dtype=str) # pd.read_excel('data/data.xlsx')[2:].reset_index(drop=True)

# def has_chinese(s):
#     return bool(re.search('[\u4e00-\u9fff]', s))

# data['is_foreign'] = data['RalatedParty'].apply(lambda x: 0 if has_chinese(x) else 1)

# map = pd.read_excel('map.xlsx', dtype=str)

# map_dict = dict(zip(map['公司名称'], map['证券代码.x']))

# data['Sub_Symbol'] = data['RalatedParty'].map(map_dict)

# data['is_subsidiary_listed'] = 0

# data.loc[~data['Sub_Symbol'].isna(), 'is_subsidiary_listed'] = 1



In [116]:
edges = search(data, '000009')
edges = [edge for edge in edges if edge[6] == '1' or edge[2] > 90] 
edges1 = [edge for edge in edges if edge[6] == '1']
edges2 = [edge for edge in edges if edge[6] == '0']

In [141]:
from collections import defaultdict

# 创建一个 defaultdict 以存储每个公司的子公司
subsidiaries_dict = defaultdict(list)

# 按公司名称分组并排序
for edge in edges2:
    company_name = edge[0]
    subsidiary = edge[1]
    holding_ratio = edge[2]
    if holding_ratio:
        subsidiaries_dict[company_name].append(edge[1:])

# 取每个公司前五个子公司
top_five_subsidiaries = {}
for company, subsidiaries in subsidiaries_dict.items():
    sorted_subsidiaries = sorted(subsidiaries, key=lambda x: x[1], reverse=True)
    top_five_subsidiaries[company] = sorted_subsidiaries[:5]

# 将结果展平成一个列表
final_edges = [(company, *sub_company) for company, sub_companies in top_five_subsidiaries.items() for sub_company in sub_companies]


In [142]:
final_edges

[('中国宝安', '宝安科技有限公司', 100.0, nan, '0', '上市公司的子公司', '0'),
 ('中国宝安', '恒丰国际投资有限公司', 100.0, nan, '0', '上市公司的子公司', '0'),
 ('中国宝安', '深圳红莲湖投资有限公司', 100.0, nan, '0', '上市公司的子公司', '0'),
 ('中国宝安', '宝安集团产业投资管理有限公司', 100.0, nan, '0', '上市公司的子公司', '0'),
 ('中国宝安', '中国宝安集团控股有限公司', 100.0, nan, '0', '上市公司的子公司', '0'),
 ('马应龙药业集团股份有限公司', '武汉广为康医疗科技有限公司', 100.0, nan, '0', '上市公司的子公司', '0'),
 ('马应龙药业集团股份有限公司', '马应龙肛肠诊疗技术研究院', 100.0, nan, '0', '上市公司的子公司', '0'),
 ('马应龙药业集团股份有限公司', '武汉广为兴科技有限公司', 100.0, nan, '0', '上市公司的子公司', '0'),
 ('马应龙药业集团股份有限公司', '武汉天一医药科技投资有限公司', 98.75, '1.25', '0', '上市公司的子公司', '0'),
 ('马应龙药业集团股份有限公司', '马应龙国际医药发展有限公司', 97.96, nan, '0', '上市公司的子公司', '0'),
 ('贝特瑞新材料集团股份有限公司', '惠州市贝特瑞新材料科技有限公司', 100.0, nan, '0', '上市公司的子公司', '0'),
 ('贝特瑞新材料集团股份有限公司', '深圳贝特瑞钠电新材料科技有限公司', 100.0, nan, '0', '上市公司的子公司', '0'),
 ('贝特瑞新材料集团股份有限公司', '贝特瑞(四川)新材料科技有限公司', 100.0, nan, '0', '上市公司的子公司', '0'),
 ('贝特瑞新材料集团股份有限公司', '深圳市贝特瑞新能源技术研究院有限公司', 100.0, nan, '0', '上市公司的子公司', '0'),
 ('贝特瑞新材料集团股份有限公司', '鸡西市贝特瑞新能源科技有限公司', 100.