In [None]:
# ChatGPT-4
import pandas as pd
import matplotlib.pyplot as plt
import os

# 确保fig目录存在
os.makedirs('fig', exist_ok=True)

# 读取CSV文件
data = pd.read_csv('data.csv')

# 以'TOA'为键进行去重
data = data.drop_duplicates(subset='TOA')

# 删除缺失数据
data = data.dropna()

# 按'category'分组并计算统计量
grouped = data.groupby('category')
stats = grouped.agg(['max', 'mean', 'min', 'median', 'var'])

# 打印统计结果
print(stats)

# 特征列表
features = ['CF', 'PW', 'PA', 'IMP']

# 绘制特征的两两散点图
for i in range(len(features)):
    for j in range(i + 1, len(features)):
        plt.figure(figsize=(8, 6))
        for category in data['category'].unique():
            subset = data[data['category'] == category]
            plt.scatter(subset[features[i]], subset[features[j]],
                        label=f'Category {category}')
        plt.xlabel(features[i])
        plt.ylabel(features[j])
        plt.title(f'{features[i]} vs {features[j]}')
        plt.legend()
        plt.savefig(f'fig/{features[i]}_vs_{features[j]}.png')
        plt.close()

print("处理完成，统计信息和图形已保存。")

In [None]:
# ChatGPT-3.5
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 读取CSV文件
df = pd.read_csv('data.csv')

# 去重，以TOA为键
df.drop_duplicates(subset=['TOA'], inplace=True)

# 删除缺失数据
df.dropna(inplace=True)

# 按照category分组并计算统计信息
grouped = df.groupby('category')
result = grouped.agg({'CF': ['max', 'mean', 'min', 'median', 'var'],
                      'PW': ['max', 'mean', 'min', 'median', 'var'],
                      'PA': ['max', 'mean', 'min', 'median', 'var'],
                      'IMP': ['max', 'mean', 'min', 'median', 'var']})

# 绘制散点图
features = ['CF', 'PW', 'PA', 'IMP']
for i in range(len(features)):
    for j in range(i + 1, len(features)):
        feature1 = features[i]
        feature2 = features[j]
        plt.figure(figsize=(8, 6))
        plt.scatter(df[feature1], df[feature2])
        plt.xlabel(feature1)
        plt.ylabel(feature2)
        plt.title(f'Scatter Plot of {feature1} vs {feature2}')
        plt.savefig(f'fig/{feature1}_{feature2}_scatter.png')
        plt.close()

# 保存统计结果
result.to_csv('statistical_results.csv')

In [None]:
# Github Copilot
import pandas as pd
import matplotlib.pyplot as plt
import os

# 读取csv数据
df = pd.read_csv('data.csv')

# 以TOA为键进行去重
df = df.drop_duplicates(subset='TOA')

# 删除缺失数据
df = df.dropna()

# 按照category分组，统计最大值、平均值、最小值、中位数、方差
grouped = df.groupby('category')
statistics = grouped.agg(['max', 'mean', 'min', 'median', 'var'])

print(statistics)

# 对(CF,PW,PA,IMP)四个特征两两组合绘制散点图
features = ['CF', 'PW', 'PA', 'IMP']
if not os.path.exists('fig'):
    os.makedirs('fig')

for i in range(len(features)):
    for j in range(i+1, len(features)):
        plt.figure(figsize=(10, 8))
        plt.scatter(df[features[i]], df[features[j]], c=df['category'])
        plt.xlabel(features[i])
        plt.ylabel(features[j])
        plt.title(f'{features[i]} vs {features[j]}')
        plt.savefig(f'fig/{features[i]}_{features[j]}.png')

In [None]:
# 文心一言
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import itertools

# 读取CSV文件
df = pd.read_csv('data.csv')

# 以TOA为键进行去重
df = df.drop_duplicates(subset='TOA', keep='first')

# 删除缺失数据
df = df.dropna()

# 按照category分组并统计
grouped = df.groupby('category')

# 统计最大值、平均值、最小值、中位数、方差
stats = grouped.agg(max_value=('CF', 'max'),
                    avg_value=('CF', 'mean'),
                    min_value=('CF', 'min'),
                    median_value=('CF', 'median'),
                    variance=('CF', 'var'))

# 添加方差为零的行（如果存在）
variance_zero = grouped.groups[grouped['CF'].var() == 0]
stats = stats.append(variance_zero, ignore_index=True)

# 绘制散点图（需要先安装matplotlib和pandas库）
# 保存到fig目录下（需要先创建该目录）
if not os.path.exists('fig'):
    os.mkdir('fig')
for combination in itertools.combinations(['CF', 'PW', 'PA', 'IMP'], 2):
    ax = plt.subplot(2, 2, combination)  # 为每个组合创建一个子图，2行2列布局
    ax.scatter(df[combination[0]], df[combination[1]])  # 绘制散点图
    ax.set_xlabel(combination[0])  # 设置x轴标签
    ax.set_ylabel(combination[1])  # 设置y轴标签
    # 保存图像到fig目录下
    plt.savefig(f'fig/scatter_{combination[0]}_{combination[1]}.png')
plt.show()  # 显示图像