In [1]:
import numpy as np
import pandas as pd
import streamlit as st
import re
from PIL import Image
from collections import defaultdict

def get_top_n_subsidiaries(edges2, n=5):

    subsidiaries_dict = defaultdict(list)

    for edge in edges2:
        company_name = edge[0]
        subsidiary = edge[1]
        holding_ratio = edge[2]
        if holding_ratio:
            subsidiaries_dict[company_name].append(edge[1:])

    top_n_subsidiaries = {}
    for company, subsidiaries in subsidiaries_dict.items():
        sorted_subsidiaries = sorted(subsidiaries, key=lambda x: x[1], reverse=True)
        top_n_subsidiaries[company] = sorted_subsidiaries[:n]

    final_edges = [(company, *sub_company) for company, sub_companies in top_n_subsidiaries.items() for sub_company in sub_companies]

    return final_edges

def get_stock_name(data, ticker):
    return data[data['Symbol'] == ticker]['Name'].values[0]

def sub_company(df, not_first_layer): # 给定dataframe，返回edges，以及子公司的(company, ticker)对
    if df.shape[0]:
        if not_first_layer:
            edges = list(zip([not_first_layer] * df.shape[0], df['RalatedParty'], df['DirectHoldingRatio'].astype(float), df['IndirectHoldingRatio'], df['is_foreign'], df['Relationship'], df['is_subsidiary_listed']))
        else:
            edges = list(zip(df['Name'], df['RalatedParty'], df['DirectHoldingRatio'].astype(float), df['IndirectHoldingRatio'], df['is_foreign'], df['Relationship'], df['is_subsidiary_listed']))
        descendant = df[~df['Sub_Symbol'].isna()]
        return edges, list(zip(descendant['RalatedParty'], descendant['Sub_Symbol']))
    else:
        return [], []

def search(data, input_ticker):
    level = 0
    edges = []
    new_edges, subsidiary = sub_company(data[data['Symbol'] == input_ticker], not_first_layer=False)
    while True:
        edges = edges + new_edges
        gen = list((sub_company(data[data['Symbol'] == ticker], not_first_layer=company) for company, ticker in subsidiary))
        new_edges, subsidiary = [edge for edges, _ in gen for edge in edges], [temp for _, temps in gen for temp in temps]
        level += 1
        if len(subsidiary) == 0 or level == 3:
            break

    return edges


In [20]:
data = pd.read_csv('data/data.csv', dtype=str) # pd.read_excel('data/data.xlsx')[2:].reset_index(drop=True)
df = pd.read_csv('data/十大股东.csv', dtype=str)
# def has_chinese(s):
#     return bool(re.search('[\u4e00-\u9fff]', s))
# data['is_foreign'] = data['RalatedParty'].apply(lambda x: 0 if has_chinese(x) else 1)
# map = pd.read_excel('map.xlsx', dtype=str)
# map_dict = dict(zip(map['公司名称'], map['证券代码.x']))
# data['Sub_Symbol'] = data['RalatedParty'].map(map_dict)
# data['is_subsidiary_listed'] = 0
# data.loc[~data['Sub_Symbol'].isna(), 'is_subsidiary_listed'] = 1

In [21]:
data

Unnamed: 0,Symbol,EndDate,RalatedParty,ISExit,Relationship,RegisterAddress,DirectHoldingRatio,IndirectHoldingRatio,Sub_Symbol,Name,is_foreign,is_subsidiary_listed
0,000001,2022-12-31,平安理财有限责任公司,0,上市公司的子公司,中国深圳,100,,,平安银行,0,0
1,000002,2022-12-31,深圳市万科发展有限公司,0,上市公司的子公司,深圳,95,5,,万科A,0,0
2,000002,2022-12-31,广州万科企业有限公司,0,上市公司的子公司,广州,100,,,万科A,0,0
3,000002,2022-12-31,上海万科企业有限公司,0,上市公司的子公司,上海,,100,,万科A,0,0
4,000002,2022-12-31,北京万科企业有限公司,0,上市公司的子公司,北京,95,5,,万科A,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
158528,873339,2022-12-31,Evertie Lighting (M)Sdn.Bhd.,0,上市公司的子公司,马来西亚,100,,,恒太照明,1,0
158529,873339,2022-12-31,江苏安明产业投资管理有限公司,0,上市公司的子公司,南通,100,,,恒太照明,0,0
158530,873527,2022-12-31,台州市生物医化产业研究院有限公司,0,上市公司的联营企业,浙江省台州市东环大道518号315室-9,5,,,夜光明,0,0
158531,873593,2022-12-31,常州墨新机电有限公司,0,上市公司的子公司,江苏省常州市,100,,,鼎智科技,0,0


In [None]:
def find_shareholder(df, not_first_layer): # 给定dataframe，返回edges，以及子公司的(company, ticker)对
    if df.shape[0]:
        if not_first_layer:
            edges = list(zip([not_first_layer] * df.shape[0], df['Shareholder_Name'], df['DirectHoldingRatio'].astype(float), df['IndirectHoldingRatio'], df['is_foreign'], df['Relationship'], df['is_subsidiary_listed']))
        else:
            edges = list(zip(df['Name'], df['Shareholder_Name'], df['DirectHoldingRatio'].astype(float), df['IndirectHoldingRatio'], df['is_foreign'], df['Relationship'], df['is_subsidiary_listed']))
        descendant = df[~df['Sub_Symbol'].isna()]
        return edges, list(zip(descendant['RalatedParty'], descendant['Sub_Symbol']))
    else:
        return [], []

def search_shareholder(data, input_ticker):
    level = 0
    edges = []
    new_edges, subsidiary = find_shareholder(data[data['Symbol'] == input_ticker], not_first_layer=False)
    while True:
        edges = edges + new_edges
        gen = list((find_shareholder(data[data['Symbol'] == ticker], not_first_layer=company) for company, ticker in subsidiary))
        new_edges, subsidiary = [edge for edges, _ in gen for edge in edges], [temp for _, temps in gen for temp in temps]
        level += 1
        if len(subsidiary) == 0 or level == 3:
            break

In [22]:
df

Unnamed: 0,Symbol,EndDate,Shareholder_Name,Shareholding_Ranking,Shares_Number,Shareholding_Ratio,Change_Direction,Change_Percentage,Change_Start_Date,Shareholder_Nature
0,000001,2023-06-30,中国平安保险(集团)股份有限公司-集团本级-自有资金,1,9618540236,49.56,4,0,2023-03-31,其他
1,000001,2023-06-30,中国平安人寿保险股份有限公司-自有资金,2,1186100488,6.11,4,0,2023-03-31,其他
2,000001,2023-06-30,香港中央结算有限公司,3,1145293846,5.9,3,-1.5529,2023-03-31,境外法人
3,000001,2023-06-30,中国平安人寿保险股份有限公司-传统-普通保险产品,4,440478714,2.27,4,0,2023-03-31,其他
4,000001,2023-06-30,中国证券金融股份有限公司,5,429232688,2.21,4,0,2023-03-31,国有法人
...,...,...,...,...,...,...,...,...,...,...
52455,873593,2023-06-30,中国工商银行股份有限公司-广发科技动力股票型证券投资基金,6,468854,0.98,1,,,其他
52456,873593,2023-06-30,深圳粤科鑫泰股权投资基金管理有限公司-东莞粤科鑫泰工控创业投资合伙企业(有限合伙),7,468000,0.97,1,,,其他
52457,873593,2023-06-30,华夏银行股份有限公司-广发北交所精选两年定期开放混合型证券投资基金,8,468000,0.97,1,,,其他
52458,873593,2023-06-30,方正证券投资有限公司,9,468000,0.97,1,,,境内非国有法人


In [19]:
df.to_csv('data/十大股东.csv', index=False)