# 数据合并排序

In [10]:
import numpy as np
import pandas as pd

d = {
    "Name": pd.Series(
        [
            "Tom",
            "James",
            "Ricky",
            "Vin",
            "Steve",
            "Minsu",
            "Jack",
            "Lee",
            "David",
            "Gasper",
            "Betina",
            "Andres",
        ]
    ),
    "Age": pd.Series([25, 26, 25, 23, 30, 29, 23, 34, 40, 30, 51, 46]),
    "Rating": pd.Series(
        [4.23, 3.24, 3.98, 2.56, 3.20, 4.6, 3.8, 3.78, 2.98, 4.80, 4.10, 3.65]
    ),
}
unsorted_df = pd.DataFrame(d)
unsorted_df

Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
1,James,26,3.24
2,Ricky,25,3.98
3,Vin,23,2.56
4,Steve,30,3.2
5,Minsu,29,4.6
6,Jack,23,3.8
7,Lee,34,3.78
8,David,40,2.98
9,Gasper,30,4.8


## 排序

## 按照标签排序

In [11]:
# axis指定行排序/列排序，默认为升序，ascending=False为降序
unsorted_df.sort_index(axis=0, ascending=False)

Unnamed: 0,Name,Age,Rating
11,Andres,46,3.65
10,Betina,51,4.1
9,Gasper,30,4.8
8,David,40,2.98
7,Lee,34,3.78
6,Jack,23,3.8
5,Minsu,29,4.6
4,Steve,30,3.2
3,Vin,23,2.56
2,Ricky,25,3.98


## 按照实际值排序

In [12]:
# 先按年龄升序排，年龄相同再按分数降序排
unsorted_df.sort_values(by=['Age', 'Rating'], ascending=[True, False])

Unnamed: 0,Name,Age,Rating
6,Jack,23,3.8
3,Vin,23,2.56
0,Tom,25,4.23
2,Ricky,25,3.98
1,James,26,3.24
5,Minsu,29,4.6
9,Gasper,30,4.8
4,Steve,30,3.2
7,Lee,34,3.78
8,David,40,2.98


## 合并

### pd.merge()

In [13]:
left = pd.DataFrame({
         'student_id':[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],
         'student_name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung', 'Billy', 'Brian', 'Bran', 'Bryce', 'Betty', 'Emma', 'Marry', 'Allen', 'Jean', 'Rose', 'David', 'Tom', 'Jack', 'Daniel', 'Andrew'],
         'class_id':[1,1,1,2,2,2,3,3,3,4,1,1,1,2,2,2,3,3,3,2], 
         'gender':['M', 'M', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'F', 'F'], 
         'age':[20,21,22,20,21,22,23,20,21,22,20,21,22,23,20,21,22,20,21,22], 
         'score':[98,74,67,38,65,29,32,34,85,64,52,38,26,89,68,46,32,78,79,87]})
left

Unnamed: 0,student_id,student_name,class_id,gender,age,score
0,1,Alex,1,M,20,98
1,2,Amy,1,M,21,74
2,3,Allen,1,F,22,67
3,4,Alice,2,F,20,38
4,5,Ayoung,2,M,21,65
5,6,Billy,2,M,22,29
6,7,Brian,3,F,23,32
7,8,Bran,3,F,20,34
8,9,Bryce,3,M,21,85
9,10,Betty,4,M,22,64


In [14]:
right = pd.DataFrame({'class_id':[1,2,3,5],  'class_name': ['ClassA', 'ClassB', 'ClassC', 'ClassE']})
right

Unnamed: 0,class_id,class_name
0,1,ClassA
1,2,ClassB
2,3,ClassC
3,5,ClassE


In [15]:

pd.merge(left, right, how='inner', on='class_id')

Unnamed: 0,student_id,student_name,class_id,gender,age,score,class_name
0,1,Alex,1,M,20,98,ClassA
1,2,Amy,1,M,21,74,ClassA
2,3,Allen,1,F,22,67,ClassA
3,11,Emma,1,F,20,52,ClassA
4,12,Marry,1,F,21,38,ClassA
5,13,Allen,1,M,22,26,ClassA
6,4,Alice,2,F,20,38,ClassB
7,5,Ayoung,2,M,21,65,ClassB
8,6,Billy,2,M,22,29,ClassB
9,14,Jean,2,M,23,89,ClassB


### pd.concat()

In [17]:
# 创建两个DataFrame对象
data1 = {
    "A": ["A0", "A1", "A2", "A3"],
    "B": ["B0", "B1", "B2", "B3"],
    "C": ["C0", "C1", "C2", "C3"],
    "D": ["D0", "D1", "D2", "D3"],
}
df1 = pd.DataFrame(data1)

data2 = {
    "A": ["A4", "A5", "A6", "A7"],
    "B": ["B4", "B5", "B6", "B7"],
    "C": ["C4", "C5", "C6", "C7"],
    "D": ["D4", "D5", "D6", "D7"],
}
df2 = pd.DataFrame(data2)

# 使用pd.concat方法将两个DataFrame对象拼接在一起
pd.concat([df1, df2])


Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7
