In [None]:
import os
import pandas as pd
from collections import defaultdict

def process_excel_files(folder_path, dictionary_path):
    # 读取词典文件
    positive_dict = pd.read_excel(dictionary_path, sheet_name="Positive Word")
    negative_dict = pd.read_excel(dictionary_path, sheet_name="Negative Word")
    
    # 提取词典词汇
    positive_words = positive_dict['Positive Word'].dropna().str.strip().tolist()
    negative_words = negative_dict['Negative Word'].dropna().str.strip().tolist()
    
    # 初始化结果字典
    result = defaultdict(lambda: defaultdict(int))
    
    # 遍历文件夹中的Excel文件
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.xlsx') and file_name.startswith('infobank_news_'):
            file_path = os.path.join(folder_path, file_name)
            date = file_name.split('_')[-1].split('.')[0]  # 提取日期部分
            
            # 读取Excel文件
            try:
                df = pd.read_excel(file_path)
                if '正文' not in df.columns:
                    continue  # 跳过没有“正文”列的文件
                
                # 统计“正文”列中词汇的出现次数
                text = ' '.join(df['正文'].dropna().astype(str))
                for word in positive_words:
                    result[date][f'p{positive_words.index(word) + 1}'] += text.count(word)
                for word in negative_words:
                    result[date][f'n{negative_words.index(word) + 1}'] += text.count(word)
            except Exception as e:
                print(f"Error processing file {file_name}: {e}")
    
    # 将结果转换为DataFrame
    result_df = pd.DataFrame(result).T.fillna(0).astype(int)
    result_df.index.name = '日期'
    result_df.reset_index(inplace=True)
    
    # 保存结果到CSV（可选）
    result_df.to_csv('output.csv', index=False, encoding='utf-8-sig')
    print("结果已保存到 output.csv")
    return result_df

# 用户输入文件夹路径和词典路径
folder_path = input("请输入包含新闻Excel文件的文件夹路径：")
dictionary_path = input("请输入词典Excel文件的路径：")

# 调用函数处理数据
result_df = process_excel_files(folder_path, dictionary_path)
print(result_df)

In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model

def calculate_time_windows(data, time_windows=[7, 15, 30]):
    """
    计算给定时间窗口的指标和。
    """
    results = {}
    for window in time_windows:
        result = data.iloc[-window:].sum(axis=0)
        results[f'input_{window}'] = result
    return results

def load_trained_model(model_path):
    """
    加载已训练的模型。
    """
    model = load_model(model_path)
    return model

def make_predictions(models, inputs):
    """
    使用模型对输入数据进行预测。
    """
    predictions = []
    for model, input_data in zip(models, inputs):
        # 模型输入需要调整为3D形状 (样本数, 时间步, 特征数)
        input_data = input_data.values.reshape(1, -1, 1)
        prediction = model.predict(input_data)
        predictions.append(prediction[0][0])  # 获取预测值
    return predictions

def investment_advice(predictions):
    """
    根据预测结果给出投资建议。
    """
    bullish_count = sum([1 if pred > 0.5 else 0 for pred in predictions])
    if bullish_count == 3:
        return "强乐观，推荐做多"
    elif bullish_count == 2:
        return "偏乐观，推荐观望或做多"
    elif bullish_count == 1:
        return "偏悲观，推荐观望或做空"
    else:
        return "强悲观，推荐做空"

# 假设 result_df 是之前代码中生成的 DataFrame
# 计算最近7、15、30天的指标和
time_window_results = calculate_time_windows(result_df.set_index('日期'))

# 加载训练好的模型
model_7_path = 'C:\\Users\\mjy\\Desktop\\final_project\\src\\scripts\\model_7.h5'
model_15_path = 'C:\\Users\\mjy\\Desktop\\final_project\\src\\scripts\\model_15.h5'
model_30_path = 'C:\\Users\\mjy\\Desktop\\final_project\\src\\scripts\\model_30.h5'

model_7 = load_trained_model(model_7_path)
model_15 = load_trained_model(model_15_path)
model_30 = load_trained_model(model_30_path)

# 准备输入数据
input_7 = time_window_results['input_7']
input_15 = time_window_results['input_15']
input_30 = time_window_results['input_30']

# 进行预测
predictions = make_predictions([model_7, model_15, model_30], [input_7, input_15, input_30])

# 获取投资建议
advice = investment_advice(predictions)
print(f"投资建议：{advice}")