In [None]:
import csv
import re
from collections import Counter, defaultdict

def read_csv_and_analyze(file_path):
    # 用于存储所有掺杂元素和位置的列表
    doping_elements = []
    doping_positions = []

    # 用于存储每个位置的掺杂元素
    position_doping = defaultdict(list)

    # 读取CSV文件
    with open(file_path, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        for row in reader:
            # 提取掺杂结构
            match = re.findall(r"\((\d+), '(\w+)'\)", row[0])
            for pos, elem in match:
                doping_elements.append(elem)
                doping_positions.append(int(pos))
                position_doping[int(pos)].append(elem)

    # 统计元素出现次数
    element_counts = Counter(doping_elements)
    print("Element Occurrence Counts:")
    print(element_counts)

    # 统计位置被提及的次数
    position_counts = Counter(doping_positions)
    print("\nDoping Position Counts:")
    print(position_counts)

    # 统计每个位置的前三名掺杂元素
    print("\nTop 3 Doping Elements for Each Position:")
    for pos in position_doping:
        top_elements = Counter(position_doping[pos]).most_common(3)
        total = sum(Counter(position_doping[pos]).values())
        top_elements_percent = [(elem, count, count/total*100) for elem, count in top_elements]
        print(f"Position {pos}: {top_elements_percent}")

# 调用函数
file_path = 'Whole Element 50percent search.csv'  
read_csv_and_analyze(file_path)


In [None]:
%matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from matplotlib.cm import Reds, Greens
import numpy as np

# 元素出现次数
element_counts = {'Zr': 688, 'Nb': 278, 'Ta': 204, 'Hf': 52, 'Mo': 50, 'Re': 10, 'V': 7, 'Tc': 6, 'Ce': 5, 'Ti': 4, 'Eu': 4, 'Gd': 3, 'Os': 3, 'Nd': 3, 'Y': 3, 'Ir': 2, 'Ho': 2, 'Dy': 2, 'Cr': 1, 'Sc': 1, 'Fe': 1, 'Tm': 1, 'Lu': 1, 'Pr': 1, 'Sm': 1, 'Yb': 1, 'Er': 1, 'Bi': 1}

# 掺杂位置计数
position_counts = {30: 84, 28: 84, 31: 84, 27: 83, 24: 83, 25: 82, 26: 82, 29: 82, 22: 57, 23: 56, 21: 52, 20: 52, 8: 42, 9: 39, 1: 39, 0: 37, 10: 26, 13: 26, 3: 25, 11: 22, 4: 22, 15: 20, 12: 20, 2: 19, 16: 17, 5: 17, 17: 16, 14: 16, 18: 15, 19: 15, 6: 14, 7: 8}

# 创建元素出现次数的柱状图
plt.figure(figsize=(10, 6))
elements, counts = zip(*element_counts.items())
norm = Normalize(vmin=min(counts), vmax=max(counts))
colors = [Reds(norm(count)) for count in counts]
plt.bar(elements, counts, color=colors)
plt.xlabel('Element')
plt.ylabel('Occurrence Count')
plt.title('Element Occurrence in Doping')
plt.xticks(rotation=45)

# 显示第一个图表
plt.show()

# 对掺杂位置计数进行排序
sorted_positions = sorted(position_counts.items(), key=lambda x: x[1], reverse=True)
positions, pos_counts = zip(*sorted_positions)

# 创建掺杂位置的柱状图
plt.figure(figsize=(12, 6))
norm = Normalize(vmin=min(pos_counts), vmax=max(pos_counts))
colors = [Greens(norm(count)) for count in pos_counts]
plt.bar(range(len(positions)), pos_counts, color=colors)

# 设置 x 轴标签并旋转 45 度
plt.xticks(range(len(positions)), positions, rotation=45)
plt.xlabel('Doping Position')
plt.ylabel('Occurrence Count')
plt.title('Doping Position Occurrence')

# 显示图表
plt.show()


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.colors import Normalize
from matplotlib.lines import Line2D  # 用于创建自定义图例
from ase.io import read

# 读取晶体结构
crystal = read("RuO2_110_std.cif")

# 掺杂位置计数
doping_counts = {30: 84, 28: 84, 31: 84, 27: 83, 24: 83, 25: 82, 26: 82, 29: 82, 22: 57, 23: 56, 21: 52, 20: 52, 8: 42, 9: 39, 1: 39, 0: 37, 10: 26, 13: 26, 3: 25, 11: 22, 4: 22, 15: 20, 12: 20, 2: 19, 16: 17, 5: 17, 17: 16, 14: 16, 18: 15, 19: 15, 6: 14, 7: 8}

# 找出掺杂次数最多的前 12 个位置
top_12_positions = sorted(doping_counts, key=doping_counts.get, reverse=True)[:12]

# 创建3D图
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# 设置颜色映射
norm = Normalize(vmin=min(doping_counts.values()), vmax=max(doping_counts.values()))
cmap = cm.get_cmap('bwr')

# 遍历晶体中的每个原子
for atom in crystal:
    x, y, z = atom.position
    count = doping_counts.get(atom.index, 0)
    color = cmap(norm(count))
    size = 20 + count * 5
    ax.scatter(x, y, z, color=color, s=size, alpha=0.5)

    # 如果原子在前 12 个掺杂位置中，添加标注
    if atom.index in top_12_positions:
        ax.text(x, y, z, f'{atom.index}', color='black')

# 隐藏坐标轴标签和网格
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_zticklabels([])
ax.grid(False)

# 添加颜色条
sm = cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, ax=ax, pad=0.1)
cbar.set_label('Doping Counts')

# 添加圆圈大小的标尺
legend_elements = [Line2D([0], [0], marker='o', color='w', label=str(count),
                          markerfacecolor='grey', markersize=np.sqrt(20 + count * 5), alpha=0.5)
                   for count in [20, 40, 60, 80]]  # 代表不同掺杂次数的圆圈大小
ax.legend(handles=legend_elements, title="Doping Counts", bbox_to_anchor=(1.05, 1), loc='lower left')

# 设置图表标题
ax.set_title('Crystal Structure with Doping Counts')

# 显示图表
plt.show()