In [37]:
import tushare as ts
from typing import Any

import numpy as np
import pandas as pd

pro = ts.pro_api('2026c96ef5fa7fc3241c96baafd638c585284c7fefaa00b93ef0a62c')

def code_to_secid(df : pd.DataFrame , code_col = 'ts_code' , retain = False):
    '''switch old symbol into secid'''
    if code_col not in df.columns.values: return df
    replace_dict = {'T00018' : '600018'}
    df['secid'] = df[code_col].astype(str).str.slice(0, 6).replace(replace_dict)
    df['secid'] = df['secid'].where(df['secid'].str.isdigit() , '-1').astype(int)
    if not retain: del df[code_col]
    return df

In [1]:
from src.data.fetcher.tushare.task import main
main()

Calendar Already Updated at 20240702
Description Already Updated at 20240702
SWIndustry Already Updated at 20240702
THSConcept Already Updated at 20240702


In [33]:
# 导入tushare
import tushare as ts
import numpy as np
import pandas as pd

from typing import Any , Literal
from abc import abstractmethod , ABC

from src.data.basic import DB_BY_DATE , DB_BY_NAME , get_target_path , get_source_dates , get_target_dates , save_df

class _TS_Date(ABC):
    UPDATE_FREQ : Literal['d' , 'w' , 'm'] = 'd'
    def __init__(self) -> None:
        assert self.get_db_src_key()[0] in DB_BY_DATE
    
    @abstractmethod
    def get_data(self , date : int , *args , **kwargs) -> pd.DataFrame: ... 

    @abstractmethod
    def get_db_src_key(self) -> tuple[str,str]:
        '''return db_src and db_key'''
        db_src = ''
        db_key = ''
        return db_src , db_key
    
    def target_path(self , date : int , makedir = False):
        return get_target_path(*self.get_db_src_key() , date = date , makedir=makedir)

    def fetch_and_save(self , date : int , *args , **kwargs):
        print('Updating {}/{} at {}'.format(*self.get_db_src_key() , date))
        save_df(self.get_data(date) , self.target_path(date , True))

class TS_Concept(_TS_Date):
    UPDATE_FREQ = 'm'
    
    def get_data(self , date : int):
        df_theme = pd.concat([pro.ths_index(exchange = 'A', type = 'N') , 
                              pro.ths_index(exchange = 'A', type = 'TH')]).reset_index(drop=True)
        dfs = []
        for i , ts_code in enumerate(df_theme['ts_code']):
            # print(i , ts_code)
            df = pro.ths_member(ts_code = ts_code)
            dfs.append(df)
        df_all = pd.concat(dfs).rename(columns={'name':'concept'})
        df_all = df_all.merge(df_theme , on = 'ts_code' , how='left').rename(columns={'ts_code':'index_code'})
        df_all = code_to_secid(df_all , 'code')
        df = df_all.reset_index(drop = True)
        return df
    
    def get_db_src_key(self):
        return 'membership_ts' , 'concept'
    
a = TS_Concept()
a.fetch_and_save(20240702)

In [None]:
# 导入tushare
import tushare as ts
from typing import Any

import numpy as np
import pandas as pd

pro = ts.pro_api('2026c96ef5fa7fc3241c96baafd638c585284c7fefaa00b93ef0a62c')
offset = 0
limit = 5000
dfs = []
while True:
    df = pro.namechange(limit = limit , offset = offset)
    
    if len(df) == 0: break
    dfs.append(df)
    offset += limit
df = pd.concat(dfs).reset_index(drop = True)
print(df)

In [4]:
df = pd.concat(dfs).reset_index(drop = True)
print(df)

         ts_code  name start_date  end_date  ann_date change_reason
0      300799.SZ   左江退   20240708      None  20240628          终止上市
1      600518.SH  康美药业   20240704      None  20240703          撤销ST
2      603350.SH   安乃达   20240703      None      None            其他
3      000007.SZ   全新好   20240702      None  20240701         撤销*ST
4      300742.SZ   越博退   20240701      None  20240621          终止上市
...          ...   ...        ...       ...       ...           ...
18288  600656.SH  凤凰化工   19901219  19961231      None            其他
18289  600654.SH  飞乐股份   19901219  20051212      None            其他
18290  600651.SH  飞乐音响   19901219  20200505      None            其他
18291  000005.SZ  深原野A   19901210  19940102      None            其他
18292  000004.SZ  深安达A   19901201  19990426      None            其他

[18293 rows x 6 columns]


In [11]:
def code_to_secid(df : pd.DataFrame , code_col = 'ts_code' , retain = False):
    '''switch old symbol into secid'''
    if code_col not in df.columns.values: return df
    replace_dict = {'T00018' : '600018'}
    df['secid'] = df[code_col].astype(str).str.slice(0, 6).replace(replace_dict)
    df['secid'] = df['secid'].where(df['secid'].str.isdigit() , '-1').astype(int)
    if not retain: del df[code_col]
    return df

df = code_to_secid(df)
df['start_date'] = df['start_date'].fillna(-1).astype(int)
df['ann_date'] = df['ann_date'].fillna(-1).astype(int)
df['end_date'] = df['end_date'].fillna(99991231).astype(int)
df['st'] = np.where(df['ann_date'] > 0 , np.minimum(df['start_date'] , df['ann_date']) , df['ann_date'])
df['ed'] = df['end_date']

In [12]:
print(df)

       name  start_date  end_date  ann_date change_reason   secid        st  \
0       左江退    20240708  99991231  20240628          终止上市  300799  20240628   
1      康美药业    20240704  99991231  20240703          撤销ST  600518  20240703   
2       安乃达    20240703  99991231        -1            其他  603350        -1   
3       全新好    20240702  99991231  20240701         撤销*ST       7  20240701   
4       越博退    20240701  99991231  20240621          终止上市  300742  20240621   
...     ...         ...       ...       ...           ...     ...       ...   
18288  凤凰化工    19901219  19961231        -1            其他  600656        -1   
18289  飞乐股份    19901219  20051212        -1            其他  600654        -1   
18290  飞乐音响    19901219  20200505        -1            其他  600651        -1   
18291  深原野A    19901210  19940102        -1            其他       5        -1   
18292  深安达A    19901201  19990426        -1            其他       4        -1   

             ed  
0      99991231  
1      99991231

In [7]:
df['change_reason'].unique()

array(['终止上市', '撤销ST', '其他', '撤销*ST', '摘星', '改名', 'ST', '*ST', '摘星改名',
       '恢复上市加N', '恢复上市', '更名', '完成股改', '摘G', '未股改加S', '暂停上市'],
      dtype=object)

In [None]:
['撤销ST', '其他', '撤销*ST', '摘星', '改名', '摘星改名',
'恢复上市加N', '恢复上市', '更名', '完成股改', '摘G', '未股改加S']

In [15]:
from datetime import datetime, timedelta
  
date_start_str = 20221201
date_end_str = 20230105
  
date_start = datetime.strptime(str(date_start_str), '%Y%m%d')  
date_end = datetime.strptime(str(date_end_str), '%Y%m%d')  
  
date_list = []  

current_date = date_start  
while current_date <= date_end:  
    date_list.append(int(current_date.strftime('%Y%m%d')))  
    current_date += timedelta(days=1)  
  
# 打印结果  
print(date_list)

ImportError: cannot import name 'range' from 'datetime' (c:\Users\jinmeng\AppData\Local\Programs\Python\Python311\Lib\datetime.py)

In [10]:
df[df['change_reason'].isin(['终止上市', 'ST', '*ST', '暂停上市'])]

Unnamed: 0,name,start_date,end_date,ann_date,change_reason,secid
0,左江退,20240708,99991231,20240628,终止上市,300799
4,越博退,20240701,99991231,20240621,终止上市,300742
7,三盛退,20240627,99991231,20240620,终止上市,300282
33,太安退,20240614,99991231,20240606,终止上市,2433
51,中期退,20240606,99991231,20240531,终止上市,996
...,...,...,...,...,...,...
17452,ST湘中意,19980429,19990530,-1,ST,548
17453,ST黄河科,19980429,20020328,-1,ST,600831
17454,ST粤海发,19980429,20010610,-1,ST,600647
17455,ST辽房天,19980429,19990525,-1,ST,558
