# Pandas Merge Example 合併分數例子

在這個例子，假設我們有幾個 Excel 表，假設當中有不同學科的成績。當中是同一班同學的資料，但每個 Excel 分別儲存了不同科目成績。現在我們想將同一班同學的各科目成績整合，我們將會使用 Python 的 Pandas 套件配合當中的 Merge 功能來達成。

In [1]:
# Load the libraries
import numpy as np
import pandas as pd

In [2]:
# A list of .xlsx files to merge
files = [
    "中文", 
    "英文", 
    "數學",
    "物理",
    "化學",
]

In [3]:
df = pd.read_excel(files[0] + ".xlsx")
df = df.rename(columns={
    "總成績": files[0]
})
df

Unnamed: 0,學生編號,第一段平時,第一段考試,第二段平時,第二段考試,第三段平時,第三段考試,中文
0,100001.0,74.0,60.0,92.0,95.0,55.0,68.0,74.0
1,100002.0,75.0,69.0,87.0,73.0,85.0,63.0,75.333333
2,100003.0,68.0,66.0,94.0,94.0,100.0,74.0,82.666667
3,100004.0,86.0,60.0,80.0,66.0,95.0,53.0,73.333333
4,100005.0,54.0,60.0,54.0,92.0,92.0,79.0,71.833333
5,100006.0,77.0,84.0,83.0,52.0,54.0,64.0,69.0
6,100007.0,83.0,81.0,77.0,87.0,65.0,69.0,77.0
7,100008.0,67.0,98.0,84.0,85.0,78.0,86.0,83.0
8,100009.0,81.0,78.0,69.0,63.0,98.0,63.0,75.333333
9,100010.0,57.0,50.0,99.0,77.0,50.0,72.0,67.5


In [4]:
# 載入一個檔案檢查一下

df = pd.read_excel(files[0] + ".xlsx")

# 將總成績改名為科目名稱，不然所有科目都會叫「總成績」
df = df.rename(columns={
    "總成績": files[0]
})

# 我們只取學生編號及原總成績兩欄。
df = df[["學生編號", files[0]]]
df

Unnamed: 0,學生編號,中文
0,100001.0,74.0
1,100002.0,75.333333
2,100003.0,82.666667
3,100004.0,73.333333
4,100005.0,71.833333
5,100006.0,69.0
6,100007.0,77.0
7,100008.0,83.0
8,100009.0,75.333333
9,100010.0,67.5


In [5]:
# 載入第二個檔案

df2 = pd.read_excel(files[1] + ".xlsx")
df2 = df2.rename(columns={
    "總成績": files[1]
})
df2 = df2[["學生編號", files[1]]]
df2

Unnamed: 0,學生編號,英文
0,100001.0,68.333333
1,100002.0,80.166667
2,100003.0,69.0
3,100004.0,79.5
4,100005.0,81.333333
5,100006.0,57.833333
6,100007.0,75.666667
7,100008.0,72.666667
8,100009.0,71.5
9,100010.0,66.166667


In [6]:
help(df.merge)

Help on method merge in module pandas.core.frame:

merge(right: 'FrameOrSeriesUnion', how: 'str' = 'inner', on: 'IndexLabel | None' = None, left_on: 'IndexLabel | None' = None, right_on: 'IndexLabel | None' = None, left_index: 'bool' = False, right_index: 'bool' = False, sort: 'bool' = False, suffixes: 'Suffixes' = ('_x', '_y'), copy: 'bool' = True, indicator: 'bool' = False, validate: 'str | None' = None) -> 'DataFrame' method of pandas.core.frame.DataFrame instance
    Merge DataFrame or named Series objects with a database-style join.
    
    A named Series object is treated as a DataFrame with a single named column.
    
    The join is done on columns or indexes. If joining columns on
    columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes
    on indexes or indexes on a column or columns, the index will be passed on.
    When performing a cross merge, no column specifications to merge on are
    allowed.
    
    Parameters
    ----------
    right : Da

In [7]:
# 使用 DataFrame Merge 將 df 和 df2 兩組數據整合
df_result = df.merge(df2, on="學生編號")
df_result

Unnamed: 0,學生編號,中文,英文
0,100001.0,74.0,68.333333
1,100002.0,75.333333,80.166667
2,100003.0,82.666667,69.0
3,100004.0,73.333333,79.5
4,100005.0,71.833333,81.333333
5,100006.0,69.0,57.833333
6,100007.0,77.0,75.666667
7,100008.0,83.0,72.666667
8,100009.0,75.333333,71.5
9,100010.0,67.5,66.166667


In [8]:
df = df.round()
df = df.astype(int)

#df = df.set_index("學生編號")
df.to_excel("final result.xlsx")

In [9]:

for topic in files[1:]:
    df2 = pd.read_excel(topic + ".xlsx")
    df2 = df2.rename(columns={
        "總成績": topic
    })

    df2 = df2[["學生編號", topic]]
    df = df.merge(df2, on="學生編號")
    
df = df.round()
df = df.astype(int)
df = df.set_index("學生編號")
df

Unnamed: 0_level_0,中文,英文,數學,物理,化學
學生編號,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
100001,74,68,77,78,75
100002,75,80,72,79,74
100003,83,69,82,81,69
100004,73,80,73,66,79
100005,72,81,67,83,67
100006,69,58,81,68,82
100007,77,76,80,75,63
100008,83,73,71,70,86
100009,75,72,67,84,80
100010,68,66,65,70,62


## 完整代碼

Full Code

In [10]:
import numpy as np
import pandas as pd

files = [
    "中文", 
    "英文", 
    "數學",
    "物理",
    "化學",
]

# The first file
topic = files[0]
df = pd.read_excel(topic + ".xlsx")

df = df.rename(columns={ "總成績": topic })

df = df[["學生編號", topic]]

# All other files

for topic in files[1:]:
    df2 = pd.read_excel(topic + ".xlsx")
    df2 = df2.rename(columns={
        "總成績": topic
    })

    df2 = df2[["學生編號", topic]]
    df = df.merge(df2, on="學生編號")
    
df = df.round()
df["學生編號"] = df["學生編號"].astype(int)
df = df.set_index("學生編號")
df.to_excel("final_results.xlsx")