In [201]:
import os 
import pandas as pd
import numpy as np
import math
import csv
from copy import deepcopy
import datetime

In [393]:
#读取太慢可以改成多线程读取
def read_origin_stock_data(origin_stock_path):
    year_list = []
    for file_name, file_format in [os.path.splitext(dir) for dir in os.listdir(origin_stock_path)]:
        if file_format == '.xlsx':
            year = file_name.strip('_.')
            year_list.append(year)
            one_year_stock_path = os.path.join(origin_stock_path, year + file_format) 
            one_year_stock = pd.read_excel(one_year_stock_path, skiprows = 4)
            list_columns = [ column  for column in one_year_stock.columns if 'Unnamed' in str(column)] 
            globals()['stock_'+year] = one_year_stock.drop(list_columns, axis=1)
    return list(set(year_list))

In [257]:
def read_stock_group_data(stock_group_path):
    category_list = []
    for file_name, file_format in [os.path.splitext(dir) for dir in os.listdir(stock_group_path)]:
        if file_format == '.csv':
            category = file_name.strip('_.')
            category_list.append(category)
            one_stock_group_path = os.path.join(stock_group_path, category + file_format) 
            one_stock_group = pd.read_csv(one_stock_group_path, index_col = 0, encoding='utf-8')
            globals()['group_'+category] = one_stock_group
    return list(set(category_list))

In [418]:
def generate_sequence_stat(category_list, year_list, year_category_63_path, year_asset_stat_path):
    asset_stat = {}
    for year in year_list:
        asset_stat[year] = {}
        asset_stat[year]['date'] = []
        for category in category_list:
            asset_stat[year][category] =[]
    for year in year_list:
        for category in category_list:
            the_group = eval('group_'+ category)
            the_year_stock = eval('stock_'+year)
            year_category_group = the_group[the_group['最新调整日期'].apply(lambda x: x.split('-')[0]) == year]
            year_category_group_seq = pd.merge(year_category_group, the_year_stock.drop('最新调整日期', axis=1), on='Wind代码', how='left')
            globals()['_'.join([year,category,'seq'])] = year_category_group_seq
            fill_the_chart(year, category, year_category_group_seq, asset_stat)
            store_path = os.path.join(year_category_63_path, '_'.join([year,category,'seq'])+'.csv')
            year_category_group_seq.to_csv(store_path, encoding='utf_8_sig', index = False)
            comupute_rate_store(store_path, year_category_group_seq)
    asset_store(asset_stat, year_asset_stat_path)

            
def comupute_rate_store(store_path, year_category_group_seq):
    date_seq = [column for column in year_category_group_seq.columns if isinstance(column,datetime.datetime)]
    new_line = ['' for _ in range(len(year_category_group_seq.columns))]
    replace_index_list = []
    last_seq = date_seq[:-1]
    current_seq = date_seq[1:]
    for last_date , current_date in zip(last_seq, current_seq):
        last_amount = np.inner(np.array(year_category_group_seq['pct']), np.array(year_category_group_seq[last_date]))
        current_amount = np.inner(np.array(year_category_group_seq['pct']), np.array(year_category_group_seq[current_date]))
        mean_return_rate = np.log(current_amount) - np.log(last_amount)
        replace_index = list(year_category_group_seq.columns).index(current_date)
        new_line[replace_index] = mean_return_rate
    with open(store_path, 'a', newline='') as f: 
        csv_write = csv.writer(f)
        csv_write.writerow(new_line)
        
        
def fill_the_chart(year, category, year_category_group_seq, asset_stat):
    date_seq = [column for column in year_category_group_seq.columns if isinstance(column,datetime.datetime)]
    for date_data in date_seq:
        if date_data not in asset_stat[year]['date']:
            asset_stat[year]['date'].append(date_data)
        asset_stat[year][category].append(np.around(np.inner(np.array(year_category_group_seq['pct']), np.array(year_category_group_seq[date_data])), 4))

        
def asset_store(asset_stat, year_asset_stat_path):        
    for year, asset in asset_stat.items():
        year_asset_path = os.path.join(year_asset_stat_path, '_'.join([year,'asset'])+'.csv')
        year_asset_df = pd.DataFrame(asset_stat[year])
        year_asset_df.to_csv(year_asset_path, encoding='utf_8_sig', index = False)

In [420]:
origin_stock_path = './Dataset/BankExam/origin_stock'
stock_group_path = './Dataset/BankExam/stock_group'
year_category_63_path = './Result/BankExam_Result/Sequence'
year_asset_stat_path = './Result/BankExam_Result/Stat'

In [415]:
year_list = read_origin_stock_data(origin_stock_path)

In [416]:
category_list = read_stock_group_data(stock_group_path)

In [421]:
generate_sequence_stat(category_list, year_list, year_category_63_path, year_asset_stat_path)