In [None]:
import networkx as nx

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

def read_csv(file_path):
    df = pd.read_csv(file_path, header=None, names=['name', 'sequence', 'parent', 'generation', 'direction'])
    return df

def create_graph(df):
    G = nx.DiGraph()
    for index, row in df.iterrows():
        G.add_node(row['name'], sequence=row['sequence'], generation=row['generation'])
        if row['parent'] != row['name']:
            G.add_edge(row['parent'], row['name'])
    return G

def draw_graph(G):
    pos = nx.spring_layout(G, scale=2)
    nx.draw(G, pos, with_labels=True, node_size=300, node_color="skyblue", font_size=5, font_weight="bold", arrows=True)
    labels = nx.get_node_attributes(G, 'generation')
    nx.draw_networkx_labels(G, pos, labels, font_size=10)
    plt.axis('equal')
    plt.show()

def main(file_path):
    df = read_csv(file_path)
    G = create_graph(df)
    draw_graph(G)

if __name__ == "__main__":
    file_path = "FastaSequenceLibrary.csv"
    main(file_path)

In [None]:
import pandas as pd  
import matplotlib.pyplot as plt  
from collections import defaultdict  
  
def read_csv(file_path):  
    # 假设第一行不是列名，我们手动指定列名  
    df = pd.read_csv(file_path, header=None, names=['Species', 'Ancestor'])  
    return df  
  
def build_evolution_tree(df):  
    # 使用字典构建树  
    tree = defaultdict(list)  
    for index, row in df.iterrows():  
        child = row['Species']  
        parent = row['Ancestor']  
        tree[parent].append(child)  
      
    # 找到根节点  
    roots = [node for node in df['Ancestor'].unique() if node not in df['Species'].values]  
    if len(roots) != 1:  
        raise ValueError("无法确定唯一的根节点，或者CSV文件结构不符合预期")  
    root = roots[0]  
      
    return root, dict(tree)  
  
# ... (其余代码保持不变)  
  
def main(file_path):  
    df = read_csv(file_path)  
    root, tree = build_evolution_tree(df)  
    plot_evolution_tree(root, tree)  
  
if __name__ == "__main__":  
    file_path = "FastaSequenceLibrary.csv"  
    main(file_path)

In [None]:
import pandas as pd  
import matplotlib.pyplot as plt  
from collections import defaultdict  
  
def read_csv(file_path):  
    # 读取CSV文件，跳过不必要的行  
    df = pd.read_csv(file_path skiprows=1)  # 假设第一行是注释  
    return df  
  
def build_evolution_tree(df):  
    # 使用字典构建树，键是节点名，值是一个包含子节点名的列表  
    tree = defaultdict(list)  
    for index, row in df.iterrows():  
        child = row['name']  
        parent = row['parent']  
        # 将子节点添加到其父节点的列表中  
        tree[parent].append(child)  
      
    # 由于根节点没有父节点，在CSV中可能不会显式列出，我们需要找到它  
    # 假设根节点不在子节点列表中  
    roots = [node for node in df['parent'].unique() if node not in df['name'].values]  
    if len(roots) != 1:  
        raise ValueError("无法确定唯一的根节点，或者CSV文件结构不符合预期")  
    root = roots[0]  
      
    # 验证树结构是否完整（可选）  
    # 确保所有非根节点都在某个父节点的子节点列表中  
    # 这里我们省略了这一步，因为我们已经从CSV中读取了数据  
      
    return root, dict(tree)  # 返回根节点和树结构  
  
def plot_evolution_tree(root, tree, x_offset=100, y_offset=50):  
    # 递归地绘制树  
    def draw_node(node, x, y, nodes_drawn):  
        # 绘制当前节点  
        plt.text(x, y, node, fontsize=10, ha='center')  
        nodes_drawn.add(node)  
          
        # 绘制子节点  
        if node in tree:  
            children = tree[node]  
            num_children = len(children)  
            child_x = x - x_offset if num_children == 1 else x - (x_offset * (num_children // 2 + (1 if num_children % 2 else 0)))  
            for child in children:  
                if child not in nodes_drawn:  
                    plt.plot([x, child_x], [y, y - y_offset], 'k-')  
                    draw_node(child, child_x, y - y_offset, nodes_drawn)  
                    child_x += x_offset  
          
    # 初始化绘图  
    plt.figure(figsize=(10, 6))  
    nodes_drawn = set()  
    draw_node(root, 0, 0, nodes_drawn)  
      
    # 设置图形边界  
    plt.xlim(-200, 200)  # 根据需要调整  
    plt.ylim(-500, 50)   # 根据树的高度调整  
    plt.axis('off')  # 关闭坐标轴  
    plt.show()  
  
def main(file_path):  
    df = read_csv(file_path)  
    root, tree = build_evolution_tree(df)  
    plot_evolution_tree(root, tree)  
  
if __name__ == "__main__":  
    file_path = "FastaSequenceLibrary.csv"  # 替换为你的CSV文件路径  
    main(file_path)

In [None]:
import pandas as pd  
import networkx as nx  
import matplotlib.pyplot as plt  
  
def read_csv(file_path):  
    df = pd.read_csv(file_path, header=None, names=['name', 'sequence', 'parent', 'generation', 'direction'])  
    return df  
  
def create_graph(df):  
    G = nx.DiGraph()  
    for index, row in df.iterrows():  
        G.add_node(row['name'], sequence=row['sequence'], generation=row['generation'])  
        if pd.notna(row['parent']) and row['parent'] != row['name']:  # 确保'parent'不是NaN且不等于自身  
            G.add_edge(row['parent'], row['name'])  
    return G  
  
def draw_graph(G):  
    # 使用 radial_tree_layout 布局  
    root = next(iter(nx.topological_sort(G)))  # 假设图中有且仅有一个根节点  
    pos = nx.drawing.layout.radial_tree_layout(G, root)  
  
    # 绘制节点和边  
    nx.draw(G, pos, with_labels=True, node_size=3000, node_color="skyblue", font_size=10, font_weight="bold", arrows=True)  
  
    # 绘制节点的额外信息（如 generation）  
    labels = nx.get_node_attributes(G, 'generation')  
    nx.draw_networkx_labels(G, pos, labels, font_color='red', font_size=10)  # 使用不同颜色以区分  
  
    plt.axis('equal')  # 保持纵横比  
    plt.show()  
  
def main(file_path):  
    df = read_csv(file_path)  
    G = create_graph(df)  
    draw_graph(G)  
  
if __name__ == "__main__":  
    file_path = "FastaSequenceLibrary.csv"  
    main(file_path)

In [None]:
import pandas as pd  
import networkx as nx  
from networkx.drawing.nx_agraph import graphviz_layout  # 如果你有pygraphviz  
# 或者如果你使用pydot  
# from networkx.drawing.nx_pydot import graphviz_layout  
  
import matplotlib.pyplot as plt  
  
def read_csv(file_path):  
    df = pd.read_csv(file_path, header=None, names=['name', 'sequence', 'parent', 'generation', 'direction'])  
    return df  
  
def create_graph(df):  
    G = nx.DiGraph()  
    for index, row in df.iterrows():  
        G.add_node(row['name'], sequence=row['sequence'], generation=row['generation'])  
        if pd.notna(row['parent']) and row['parent'] != row['name']:  
            G.add_edge(row['parent'], row['name'])  
    return G  
  
def draw_graph(G):  
    # 使用Graphviz布局（这里假设你安装了pygraphviz或pydot）  
    pos = graphviz_layout(G, prog='dot')  # 'dot'是Graphviz的一个布局引擎  
  
    # 绘制节点和边  
    nx.draw(G, pos, with_labels=True, node_size=3000, node_color="skyblue", font_size=10, font_weight="bold", arrows=True)  
  
    # 绘制节点的额外信息（如 generation）  
    labels = nx.get_node_attributes(G, 'generation')  
    nx.draw_networkx_labels(G, pos, labels, font_color='red', font_size=10)  
  
    plt.axis('off')  # 关闭坐标轴  
    plt.show()  
  
def main(file_path):  
    df = read_csv(file_path)  
    G = create_graph(df)  
    draw_graph(G)  
  
if __name__ == "__main__":  
    file_path = "FastaSequenceLibrary.csv"
    main(file_path)

In [None]:
import networkx as nx  
import matplotlib.pyplot as plt  
  
def create_tree(root, children_dict):  
    """  
    根据给定的根节点和子节点字典创建树状图。  
      
    :param root: 根节点的名称  
    :param children_dict: 字典，键为节点名称，值为其子节点列表  
    :return: NetworkX的DiGraph对象  
    """  
    G = nx.DiGraph()  
    G.add_node(root)  
      
    def add_nodes_and_edges(parent, children):  
        for child in children:  
            G.add_node(child)  
            G.add_edge(parent, child)  
            if child in children_dict:  
                add_nodes_and_edges(child, children_dict[child])  
      
    if root in children_dict:  
        add_nodes_and_edges(root, children_dict[root])  
      
    return G  
  
def draw_tree(G, root, pos=None, vertical_spacing=1.0, horizontal_spacing=2.0):  
    """  
    使用matplotlib绘制树状图。  
      
    :param G: NetworkX的DiGraph对象  
    :param root: 根节点的名称  
    :param pos: 节点位置字典（可选），如果未提供，则自动计算  
    :param vertical_spacing: 垂直间距  
    :param horizontal_spacing: 水平间距  
    """  
    if pos is None:  
        # 简单的层次布局算法，用于计算节点位置  
        levels = {root: 0}  
        current_level = 0  
        for node in G.nodes():  
            if node != root and G.predecessors(node) == [root]:  
                current_level += 1  
                levels[node] = current_level  
            for pred in G.predecessors(node):  
                if levels.get(node, float('inf')) > levels[pred] + 1:  
                    levels[node] = levels[pred] + 1  
          
        # 计算具体的位置  
        pos = {node: (horizontal_spacing * level, -vertical_spacing * levels[node]) for node, level in levels.items()}  
      
    # 绘制节点和边  
    nx.draw(G, pos, with_labels=True, arrows=True, node_size=2000, node_color='lightblue')  
      
    # 可选：美化图形  
    plt.axis('equal')  # 保持x轴和y轴的比例一致  
    plt.title('Tree Diagram')  
    plt.show()  
  
# 示例用法  
if __name__ == "__main__":  
    # 定义树结构  
    root = 'A'  
    children_dict = {  
        'A': ['B', 'C'],  
        'B': ['D', 'E'],  
        'C': ['F'],  
        'D': [],  
        'E': ['G'],  
        'F': [],  
        'G': []  
    }  
      
    # 创建树状图  
    G = create_tree(root, children_dict)  
      
    # 绘制树状图  
    draw_tree(G, root)

In [None]:
from wordcloud import WordCloud  
import matplotlib.pyplot as plt  
  
# 提取的关键词列表  
keywords = [  
    '设计生成网络',  
    '强化学习',  
    '打分函数',  
    '深度学习',  
    '多肽空间',  
    '特征理解',  
    '泛化能力',  
    '序列基序',  
    '物理方法',  
    '多肽任务'  
]  
  
# 创建词云对象，设置参数（可以根据需要调整）  
wordcloud = WordCloud(  
    width=400,   
    height=400,   
    background_color='white',   
    font_path='C:/Windows/Fonts/simhei.ttf',  # 例如 'C:/Windows/Fonts/simhei.ttf' 用于显示中文  
    mode='RGBA'  
).generate(' '.join(keywords))  
  
# 使用matplotlib展示词云图  
plt.imshow(wordcloud, interpolation='bilinear')  
plt.axis('off')  # 不显示坐标轴  
plt.show()

In [None]:
from wordcloud import WordCloud  
import matplotlib.pyplot as plt  
  
# 提取的关键词列表及其权重  
keywords_with_weights = {  
    '设计生成网络': 10,  
    '强化学习': 800,  
    '打分函数': 6,  
    '深度学习': 90,  
    '多肽空间': 7,  
    '特征理解': 5,  
    '泛化能力': 40,  
    '序列基序': 3,  
    '物理方法': 2,  
    '多肽任务': 1  
}  
  
# 创建词云对象，设置参数  
wordcloud = WordCloud(  
    width=800,   
    height=400,   
    background_color='white',   
    font_path='C:/Windows/Fonts/simhei.ttf',  # 例如 'C:/Windows/Fonts/simhei.ttf' 用于显示中文  
    mode='RGBA'  
).generate_from_frequencies(keywords_with_weights)  
  
# 使用matplotlib展示词云图  
plt.imshow(wordcloud, interpolation='bilinear')  
plt.axis('off')  # 不显示坐标轴  
plt.show()

In [None]:
from wordcloud import WordCloud  
import matplotlib.pyplot as plt  
  
# 提取的关键词及其频度  
keywords_with_frequencies = {  
    '生成网络': 20,  
    '强化学习': 25,  
    '分子动力学': 10,  
    '打分函数': 10,  
    '深度学习': 25,  
    '多肽空间': 20,  
    '特征理解': 8,  
    '泛化能力': 12,  
    '序列基序': 7,  
    '物理方法': 5,  
    '多肽任务': 3  
}  
  
# 创建词云对象，设置参数  
wordcloud = WordCloud(  
    width=800,   
    height=400,   
    background_color='white',   
    font_path='C:/Windows/Fonts/simhei.ttf',  # 例如 'C:/Windows/Fonts/simhei.ttf' 用于显示中文  
    mode='RGBA',  
    max_font_size=100 , # 可以调整最大字号  
    # min_font_size=50  # 可以调整最小字号  
    
  
    # colormap='cool'  # 可以调整词云颜色  
    # colormap='prism'  # 可以调整词云颜色  
    # colormap='gnuplot'  # 可以调整词云颜色  
    # colormap='viridis'  # 可以调整词云颜色  
    # colormap='plasma'  # 可以调整词云颜色  add 
    # colormap='magma'  # 可以调整词云颜色  
    # colormap='inferno'  # 可以调整词云颜色  
    # colormap='cividis'  # 可以调整词云颜色  
).generate_from_frequencies(keywords_with_frequencies)  
  
# 使用matplotlib展示词云图  
plt.figure(figsize=(5, 5))  # 设置画布大小  
plt.imshow(wordcloud, interpolation='bilinear')  
plt.axis('off')  # 不显示坐标轴  
plt.show()