In [1]:
import pandas as pd

# 读取文件
excel_file = pd.ExcelFile('data_fin.xlsx')

# 获取所有表名
sheet_names = excel_file.sheet_names
sheet_names

['Sheet1']

In [2]:
# 获取指定工作表中的数据
df = excel_file.parse('Sheet1')

# 查看数据的基本信息
print('数据基本信息：')
df.info()

# 查看数据集行数和列数
rows, columns = df.shape

if rows < 100 and columns < 20:
    # 短表数据（行数少于100且列数少于20）查看全量数据信息
    print('数据全部内容信息：')
    print(df.to_csv(sep='\t', na_rep='nan'))
else:
    # 长表数据查看数据前几行信息
    print('数据前几行内容信息：')
    print(df.head().to_csv(sep='\t', na_rep='nan'))

数据基本信息：
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 247 entries, 0 to 246
Data columns (total 33 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Fe               246 non-null    float64
 1   Co               235 non-null    float64
 2   Mn               154 non-null    float64
 3   Al               178 non-null    float64
 4   Ni               223 non-null    float64
 5   B                111 non-null    float64
 6   Hf               7 non-null      float64
 7   Si               152 non-null    float64
 8   Cu               139 non-null    float64
 9   P                101 non-null    float64
 10  Mo               100 non-null    float64
 11  Cr               123 non-null    float64
 12  La               3 non-null      float64
 13  Ti               16 non-null     float64
 14  Sn               7 non-null      float64
 15  V                4 non-null      float64
 16  Ga               8 non-null      float64
 17  Nb      

In [3]:
# 将缺失值填充为0
df = df.fillna(0)



In [4]:

df.head()

Unnamed: 0,Fe,Co,Mn,Al,Ni,B,Hf,Si,Cu,P,...,DO3,BCC,FCC,HCP,Orthorhombic,hexagonal,Tetragonal,Primitive Cubic,Ms,Hc
0,44.143049,39.897917,15.959034,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,170.0,636.0
1,40.314556,39.046899,15.103242,5.535303,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,159.0,769.0
2,39.298592,38.359388,15.818767,6.523253,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,134.0,716.0
3,38.926863,35.510768,16.142792,9.419576,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,191.0,159.0
4,35.412374,32.85036,15.535554,16.201711,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,181.0,2387.0


In [5]:
# 定义要转换的列名范围
start_col = 'DO3'
end_col = 'Primitive Cubic'

# 获取这些列的名称
columns_to_convert = df.loc[:, start_col:end_col].columns

# 将这些列的数据类型转换为 int
df[columns_to_convert] = df[columns_to_convert].astype(int)

# 打印转换后的数据类型以验证
print(df[columns_to_convert].dtypes)

DO3                int32
BCC                int32
FCC                int32
HCP                int32
Orthorhombic       int32
hexagonal          int32
Tetragonal         int32
Primitive Cubic    int32
dtype: object


In [6]:
df.head()

Unnamed: 0,Fe,Co,Mn,Al,Ni,B,Hf,Si,Cu,P,...,DO3,BCC,FCC,HCP,Orthorhombic,hexagonal,Tetragonal,Primitive Cubic,Ms,Hc
0,44.143049,39.897917,15.959034,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,1,1,0,0,0,0,0,170.0,636.0
1,40.314556,39.046899,15.103242,5.535303,0.0,0.0,0.0,0.0,0.0,0.0,...,0,1,1,0,0,0,0,0,159.0,769.0
2,39.298592,38.359388,15.818767,6.523253,0.0,0.0,0.0,0.0,0.0,0.0,...,0,1,1,0,0,0,0,0,134.0,716.0
3,38.926863,35.510768,16.142792,9.419576,0.0,0.0,0.0,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,191.0,159.0
4,35.412374,32.85036,15.535554,16.201711,0.0,0.0,0.0,0.0,0.0,0.0,...,0,1,0,0,0,0,0,0,181.0,2387.0


In [7]:
# 将结果保存为csv文件，不保存索引列
csv_path = 'data_fin.csv'
df.to_csv(csv_path, index=False)