In [7]:
import numpy as np
import pandas as pd
import streamlit as st
import re
from PIL import Image
from collections import defaultdict

def has_chinese(s):
    return bool(re.search('[\u4e00-\u9fff]', s))

def get_top_n_subsidiaries(edges2, n=5):

    subsidiaries_dict = defaultdict(list)

    for edge in edges2:
        company_name = edge[0]
        subsidiary = edge[1]
        holding_ratio = edge[2]
        if holding_ratio:
            subsidiaries_dict[company_name].append(edge[1:])

    top_n_subsidiaries = {}
    for company, subsidiaries in subsidiaries_dict.items():
        sorted_subsidiaries = sorted(subsidiaries, key=lambda x: x[1], reverse=True)
        top_n_subsidiaries[company] = sorted_subsidiaries[:n]

    final_edges = [(company, *sub_company) for company, sub_companies in top_n_subsidiaries.items() for sub_company in sub_companies]

    return final_edges

def get_stock_name(data, ticker):
    return data[data['Symbol'] == ticker]['Name'].values[0]

def sub_company(df, not_first_layer): # 给定dataframe，返回edges，以及子公司的(company, ticker)对
    if df.shape[0]:
        if not_first_layer:
            edges = list(zip([not_first_layer] * df.shape[0], df['RalatedParty'], df['DirectHoldingRatio'].astype(float), df['IndirectHoldingRatio'], df['is_foreign'], df['Relationship'], df['is_subsidiary_listed']))
        else:
            edges = list(zip(df['Name'], df['RalatedParty'], df['DirectHoldingRatio'].astype(float), df['IndirectHoldingRatio'], df['is_foreign'], df['Relationship'], df['is_subsidiary_listed']))
        descendant = df[~df['Sub_Symbol'].isna()]
        return edges, list(zip(descendant['RalatedParty'], descendant['Sub_Symbol']))
    else:
        return [], []

def search(data, input_ticker):
    level = 0
    edges = []
    new_edges, subsidiary = sub_company(data[data['Symbol'] == input_ticker], not_first_layer=False)
    while True:
        edges = edges + [i + (level,) for i in new_edges]
        level += 1
        gen = list((sub_company(data[data['Symbol'] == ticker], not_first_layer=company) for company, ticker in subsidiary))
        new_edges, subsidiary = [edge + (level,) for edges, _ in gen for edge in edges], [temp for _, temps in gen for temp in temps]
        if len(subsidiary) == 0 or level == 3:
            break
    edges = [edge for edge in edges if edge[6] == '1' or not pd.isnull(edge[2])]
    return edges

def Shareholder(df, not_first_layer): # 给定dataframe，返回edges，以及子公司的(company, ticker)对
    if df.shape[0]:
        if not_first_layer:
            edges = list(zip([not_first_layer] * df.shape[0], df['Shareholder_Name'], df['Shareholding_Ratio'].astype(float), df['Shares_Number'], df['is_foreign'], df['Shareholder_Nature'], df['is_subsidiary_listed']))
        else:
            edges = list(zip(df['Name'], df['Shareholder_Name'], df['Shareholding_Ratio'].astype(float), df['Shares_Number'], df['is_foreign'], df['Shareholder_Nature'], df['is_subsidiary_listed']))
        descendant = df[~df['Sub_Symbol'].isna()]
        return edges, list(zip(descendant['Shareholder_Name'], descendant['Sub_Symbol']))
    else:
        return [], []

def search_shareholder(data, input_ticker):
    level = -1
    edges = []
    new_edges, shareholders = Shareholder(data[data['Symbol'] == input_ticker], not_first_layer=False)
    while True:
        edges = edges + [i + (level,) for i in new_edges]
        level -= 1
        gen = list((Shareholder(data[data['Symbol'] == ticker], not_first_layer=company) for company, ticker in shareholders))
        new_edges, shareholders = [edge + (level,) for edges, _ in gen for edge in edges], [temp for _, temps in gen for temp in temps]
        if len(shareholders) == 0 or level == -3:
            break
    edges = [edge for edge in edges if edge[6] == '1' or not pd.isnull(edge[2])]
    return edges


In [8]:
data = pd.read_csv('data/data.csv', dtype=str) # pd.read_excel('data/data.xlsx')[2:].reset_index(drop=True)
df = pd.read_csv('data/十大股东.csv', dtype=str)

# data['is_foreign'] = data['RalatedParty'].apply(lambda x: 0 if has_chinese(x) else 1)
# map = pd.read_excel('map.xlsx', dtype=str)
# map_dict = dict(zip(map['公司名称'], map['证券代码.x']))
# data['Sub_Symbol'] = data['RalatedParty'].map(map_dict)
# data['is_subsidiary_listed'] = 0
# data.loc[~data['Sub_Symbol'].isna(), 'is_subsidiary_listed'] = 1

In [28]:
df[df['Shareholder_Nature'].str.contains('境外自然人')]

Unnamed: 0,Symbol,EndDate,Shareholder_Name,Shareholding_Ranking,Shares_Number,Shareholding_Ratio,Change_Direction,Change_Percentage,Change_Start_Date,Shareholder_Nature,Name,is_foreign,Sub_Symbol,is_subsidiary_listed
126,000016,2023-06-30,NAM NGAI,7,23000000,0.96,3,-0.0008,2023-03-31,境外自然人,深康佳A,1,,0
155,000020,2023-06-30,李中秋,6,2830000,1,4,0,2023-03-31,境外自然人,深华发A,0,,0
395,000056,2023-06-30,鍾志強,6,18090050,1.49,4,0,2023-03-31,境外自然人,皇庭国际,0,,0
403,000058,2023-06-30,LISHERYNZHANMING,4,5809400,0.47,2,0.0104,2023-03-31,境外自然人,深赛格,1,,0
405,000058,2023-06-30,龚茜华,6,2940000,0.24,4,0,2023-03-31,境外自然人,深赛格,0,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51081,833429,2023-06-30,陈庆玥,5,4644000,3.73,4,0,2023-03-31,境外自然人,康比特,0,,0
51117,833523,2023-06-30,艾建杰,1,11758260,15.0894,4,0,2023-03-31,境外自然人,德瑞锂电,0,,0
52063,839792,2023-06-30,张庆彬,2,19284940,11.6497,4,0,2023-03-31,境外自然人,东和新材,0,,0
52400,873223,2023-06-30,唐旭文,1,98700000,62.7523,4,0,2023-03-31,境外自然人,荣亿精密,0,,0


In [141]:
df.to_csv('data/十大股东.csv', index=False)