In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker #调整刻度
from sklearn.linear_model import LinearRegression
from scipy.optimize import nnls
from scipy.stats import zscore #数据标准化
from scipy.stats import pearsonr #计算相关系数
from sklearn.metrics import r2_score #计算决定系数R^2
import iFinDPy
from iFinDPy import *
from sklearn.preprocessing import StandardScaler
# 设置全局字体
plt.rcParams['font.family'] = 'Microsoft YaHei'

C:\Users\sharon\anaconda3\Lib\site-packages\iFinDPy.pth


In [17]:
def extract_history_data(stock_package, key_stock, start_day, end_day, account, password):
    THS_iFinDLogin(account,password)
    data1 = THS_HQ(stock_package,'close','',start_day,end_day)
    data2 = THS_HQ(key_stock,'close','',start_day,end_day)
    THS_iFinDLogout()
    data1 = pd.DataFrame(data1.data)
    data1['time'] = pd.to_datetime(data1['time'])
    data2 = pd.DataFrame(data2.data)
    data2['time'] = pd.to_datetime(data2['time'])
    pivot_data1 = data1.pivot_table(index='time', columns='thscode', values='close')
    pivot_data2 = data2.pivot_table(index='time', columns='thscode', values='close')
    #提取时间
    time_period = np.array(pivot_data2.index)
    time_period = np.datetime_as_string(time_period, unit='D')
    #标准化
    scaler = StandardScaler()
    standard_data1 = scaler.fit_transform(pivot_data1)
    standard_data2 = scaler.fit_transform(pivot_data2)
    return standard_data1.round(4), standard_data2.round(4).flatten(), time_period

def non_negative_linear_regression(X, y, feature_names, granularity=0):
    
    # 将回归问题转化为非负约束的优化问题
    non_negative_coefficients, _ = nnls(X, y)
    
    # 归一化系数
    sum_coefficients = np.sum(non_negative_coefficients)
    vicoefficients = non_negative_coefficients / sum_coefficients

    # 计算回归函数的预测值
    regression_values = np.dot(X, non_negative_coefficients)

    #从高到低排序
    sorted_vicoefficients = np.array(sorted(zip(feature_names, vicoefficients), key=lambda x: x[1], reverse=True))
    
    if granularity:
    # 提取系数并应用颗粒度处理
        sorted_vicoefficients1 = sorted_vicoefficients[:, 1].astype(float)
        sorted_vicoefficients_granular = np.around(sorted_vicoefficients1 / granularity) * granularity 

        # 计算归一化的颗粒度系数
        adjusted_vicoefficients_granular = sorted_vicoefficients_granular / np.sum(sorted_vicoefficients_granular)

        # 打印结果
        for name, coef in zip(sorted_vicoefficients[:, 0], adjusted_vicoefficients_granular):
            if coef != 0:
                print(name, ':', "{:.2f}%".format(float(coef) * 100))
    
    else:
        # 打印结果
        for name, coef in sorted_vicoefficients:
            coef = float(coef)  # 将系数转换为float类型
            if coef != 0:
                print(name, ':', "{:.2f}%".format(float(coef) * 100))
        """
        不做颗粒度处理则仅返回按比例投资的预测值数组，按比例投资系数（而非实际回归系数）
        """
        return regression_values, non_negative_coefficients
    


In [19]:
account = 'hfjj***'
password = '******'

stock_package = '000015.SH,000016.SH,000688.SH,000852.SH,000903.SH,000905.SH,000906.SH,399001.SZ,399005.SZ,399006.SZ,399295.SZ,399296.SZ,399300.SZ,399330.SZ,399673.SZ,399922.SZ,CI005001.CI,CI005002.CI,CI005003.CI,CI005004.CI,CI005005.CI,CI005006.CI,CI005007.CI,CI005008.CI,CI005009.CI,CI005010.CI,CI005011.CI,CI005012.CI,CI005013.CI,CI005014.CI,CI005015.CI,CI005016.CI,CI005017.CI,CI005018.CI,CI005019.CI,CI005020.CI,CI005021.CI,CI005022.CI,CI005023.CI,CI005024.CI,CI005025.CI,CI005026.CI,CI005027.CI,CI005028.CI,CI005029.CI,CI005030.CI'
bunch_labels = np.array(['红利指数', '上证50', '科创50', '中证1000', '中证100',
                       '中证500', '中证800', '深证成指', '中小100','创业板指', 
                       '创价值','创成长','沪深300','深证100','创业板50',
                        '石油石化指数', '煤炭指数', '有色金属指数', '电力及公用事业指数', '钢铁指数', '基础化工指数',
       '建筑指数', '建材指数', '轻工制造指数', '机械指数', '电力设备及新能源指数', '国防军工指数', '汽车指数',
       '商贸零售指数', '消费者服务指数', '家电指数', '纺织服装指数', '医药指数', '食品饮料指数', '农林牧渔指数',
       '银行指数', '非银行金融指数', '房地产指数', '交通运输指数', '电子指数', '通信指数', '计算机指数', '传媒指数',
       '综合指数', '综合金融指数'])

key_stock = '399372.SZ' #大盘成长
#399373.SZ 大盘价值组
start_day = '2022-07-01'
end_day = '2022-12-31'

bunch, stock1, time_period = extract_history_data(stock_package, key_stock, start_day, end_day, account, password)
regression_values1 = non_negative_linear_regression(bunch, stock1, bunch_labels,0.01)

深证100 : 56.00%
中小100 : 16.00%
创成长 : 14.00%
创业板50 : 6.00%
医药指数 : 5.00%
上证50 : 3.00%


In [20]:
bunch, stock1, time_period = extract_history_data(stock_package, key_stock, start_day, end_day, account, password)
regression_values1 = non_negative_linear_regression(bunch, stock1, bunch_labels,0.05)

深证100 : 55.00%
中小100 : 15.00%
创成长 : 15.00%
创业板50 : 5.00%
医药指数 : 5.00%
上证50 : 5.00%
