## <font color='red'>什么是Series？</font>

In [1]:
import numpy as np
import pandas as pd

In [4]:
s = pd.Series(data = np.random.randint(10,20,size = 3),
          index = list('ABC'),name='age',dtype = np.float32)
s

A    18.0
B    10.0
C    14.0
Name: age, dtype: float32

## <font color='red'>Series如何索引？</font>

In [5]:
l = [1,2,5,7]
l

[1, 2, 5, 7]

In [6]:
l[3]

7

In [7]:
l[-1]

7

### 位置索引

In [8]:
data= [10,20,30,40,50]
index = list('ABCDE')
series = pd.Series(data = data,index= index)
series

A    10
B    20
C    30
D    40
E    50
dtype: int64

In [18]:
series[0]

10

In [10]:
series.iloc[2]

30

### 标签索引

In [11]:
series['E']

50

In [12]:
series.loc['B']

20

## <font color='red'>Series如何切片？</font>

In [20]:
# https://blog.csdn.net/Soft_Po/article/details/106037982
series

A    10
B    20
C    30
D    40
E    50
dtype: int64

### 位置索引

In [21]:
series.iloc[1:3]

B    20
C    30
dtype: int64

In [22]:
series[1:3]

B    20
C    30
dtype: int64

### 标签索引

In [23]:
series['B':'D']

B    20
C    30
D    40
dtype: int64

In [24]:
series.loc['B':'D']

B    20
C    30
D    40
dtype: int64

## <font color='red'>Series有哪些属性？</font>

In [30]:
import numpy as np
import pandas as pd
s = pd.Series(np.random.randint(0,151,size = 2000),name = 'Python')
s

0        76
1       137
2        92
3        92
4       150
       ... 
1995     98
1996     72
1997     55
1998    134
1999     94
Name: Python, Length: 2000, dtype: int32

In [31]:
s.name

'Python'

In [32]:
s.dtype

dtype('int32')

In [33]:
s.ndim

1

In [34]:
s.shape

(2000,)

In [35]:
s.size

2000

In [37]:
list(s.index)

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [38]:
s.values

array([ 76, 137,  92, ...,  55, 134,  94])

## <font color='red'>Series如何转变为DataFrame？</font>

In [39]:
import pandas as pd
data = [10, 20, 30, 40, 50]
index = ['A', 'B', 'C', 'D', 'E']
series = pd.Series(data, index=index)
series

A    10
B    20
C    30
D    40
E    50
dtype: int64

### 方式一

In [40]:
df = pd.DataFrame(series,columns=['Python'])
df

Unnamed: 0,Python
A,10
B,20
C,30
D,40
E,50


In [41]:
df.shape

(5, 1)

In [42]:
series.shape

(5,)

In [43]:
series.ndim

1

In [44]:
df.ndim

2

In [45]:
series.values

array([10, 20, 30, 40, 50], dtype=int64)

In [46]:
df.values

array([[10],
       [20],
       [30],
       [40],
       [50]], dtype=int64)

### 方式二

In [48]:
series.to_frame(name = 'Python')

Unnamed: 0,Python
A,10
B,20
C,30
D,40
E,50


## <font color='red'>如何创建DataFrame？</font>

### 方式一字典

In [49]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'London', 'Paris']
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,London
2,Charlie,35,Paris


### 方式二列表嵌套元组或列表

In [51]:
import pandas as pd

data = [['Alice', 25, 'New York'],
        ['Bob', 30, 'London'],
        ['Charlie', 35, 'Paris']]

df = pd.DataFrame(data, columns=['Name', 'Age', 'City'])
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,London
2,Charlie,35,Paris


### 方式三NumPy数组创建

In [52]:
df = pd.DataFrame(data = np.random.randint(0,151,size =(500,3)),
                  columns=['Python','Math','En'],dtype=np.int64)
df

Unnamed: 0,Python,Math,En
0,146,85,105
1,35,126,54
2,135,83,108
3,75,62,11
4,113,9,59
...,...,...,...
495,117,35,4
496,120,119,10
497,54,21,3
498,146,135,31


## <font color='red'>Pandas如何读写文件？</font>

### 文件写入

In [62]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(0,151,size = (100,3)), 
                  columns=['Python', 'Math', 'En'])

# csv 文本文件，结构化的数据
df.to_csv('output.csv', index=False,sep = '-')  # index=False 表示不写入行索引

In [63]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(0,151,size = (100,3)), 
                  columns=['Python', 'Math', 'En'])

# csv 文本文件，结构化的数据
df.to_excel('output.xlsx', index=False)  # index=False 表示不写入行索引

In [65]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(0,151,size = (100,3)), 
                  columns=['Python', 'Math', 'En'])

# csv 文本文件，结构化的数据
df.to_json('output.json', orient='split')  # index=False 表示不写入行索引

### 文件读取

In [67]:
pd.read_csv('./output.csv',sep = '-')

Unnamed: 0,Python,Math,En
0,126,63,92
1,62,61,62
2,142,1,69
3,84,135,56
4,7,116,94
...,...,...,...
95,68,125,106
96,125,77,22
97,2,79,138
98,74,113,136


In [68]:
pd.read_excel('./output.xlsx')

Unnamed: 0,Python,Math,En
0,112,3,101
1,46,12,145
2,77,125,144
3,140,54,148
4,61,15,50
...,...,...,...
95,100,61,76
96,24,122,109
97,55,140,18
98,124,62,7


In [70]:
pd.read_json('./output.json',orient='split')

Unnamed: 0,Python,Math,En
0,82,145,41
1,36,80,68
2,1,64,39
3,84,46,100
4,94,105,85
...,...,...,...
95,70,133,82
96,109,144,27
97,67,114,120
98,93,16,136


## <font color='red'>Pandas向Excel中写入多个表单？</font>

In [76]:
import pandas as pd

# 假设有两个 DataFrame
data1 = {'Name': ['Alice', 'Bob', 'Charlie'],
         'Age': [25, 30, 35],
         'City': ['New York', 'London', 'Paris']}
df1 = pd.DataFrame(data1)

data2 = {'Product': ['A', 'B', 'C'],
         'Price': [10, 20, 30],
         'Quantity': [100, 200, 300]}
df2 = pd.DataFrame(data2)
display(df1,df2)

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,London
2,Charlie,35,Paris


Unnamed: 0,Product,Price,Quantity
0,A,10,100
1,B,20,200
2,C,30,300


In [77]:
# pip install openpyxl
with pd.ExcelWriter('./多表.xlsx',engine='openpyxl') as ew:
    df1.to_excel(ew,sheet_name='sheet1',index=False)
    df2.to_excel(ew,sheet_name='表二',index=False)

## <font color='red'>如何创建多层索引DataFrame？</font>

### 方式一（元组）

In [17]:
import pandas as pd

# 创建一个元组列表作为多层索引的标签
index_labels = [('张三', '期中'), ('张三', '期末'), ('李四', '期中'), ('李四', '期末')]

# 使用pd.MultiIndex.from_tuples创建多层索引
multi_index = pd.MultiIndex.from_tuples(index_labels, 
                                        names=['Name', 'Exam'])

# 创建DataFrame
data = {
    'Python': [98, 149, 130, 147],
    'Math': [88, 136, 117, 100]}

df = pd.DataFrame(data,index=multi_index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,Math
Name,Exam,Unnamed: 2_level_1,Unnamed: 3_level_1
张三,期中,98,88
张三,期末,149,136
李四,期中,130,117
李四,期末,147,100


### 方式二（数组）

In [18]:
import pandas as pd

# 创建多维数组作为多层索引的标签
index_labels = [['张三', '张三', '李四', '李四'], ['期中', '期末', '期中', '期末']]

# 使用pd.MultiIndex.from_arrays创建多层索引
multi_index = pd.MultiIndex.from_arrays(index_labels, names=['Name', 'Exam'])

# 创建DataFrame
data = {
    'Python': [98, 149, 130, 147],
    'Math': [88, 136, 117, 100]}

pd.DataFrame(data,index=multi_index)

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,Math
Name,Exam,Unnamed: 2_level_1,Unnamed: 3_level_1
张三,期中,98,88
张三,期末,149,136
李四,期中,130,117
李四,期末,147,100


### 方式三（笛卡尔积）

In [19]:
import pandas as pd
import numpy as np

# 创建多层索引的标签的笛卡尔积
index_labels = pd.MultiIndex.from_product([['张三', '李四','王五','赵六'], 
                                           ['期中', '期末']],
                                          names=['Name','Exam'])

pd.DataFrame(np.random.randint(80,151,size = (8,2)),
             index = index_labels,# 行索引
             columns=['Python','Math']) # 列索引

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,Math
Name,Exam,Unnamed: 2_level_1,Unnamed: 3_level_1
张三,期中,126,137
张三,期末,146,133
李四,期中,139,83
李四,期末,105,105
王五,期中,117,87
王五,期末,106,96
赵六,期中,132,81
赵六,期末,101,123


In [20]:
pd.DataFrame(np.random.randint(80,151,size = (2,4)),
             index=['张三','李四'],
             columns=pd.MultiIndex.from_product([['Python','Math'],
                                                 ['期中','期末']]))

Unnamed: 0_level_0,Python,Python,Math,Math
Unnamed: 0_level_1,期中,期末,期中,期末
张三,107,91,111,112
李四,123,134,102,141


## <font color='red'>如何读写多层索引DataFrame？</font>

In [23]:
import pandas as pd
import numpy as np

# 创建多层索引的标签的笛卡尔积
index_labels = pd.MultiIndex.from_product([['张三', '李四','王五','赵六'], 
                                           ['期中', '期末']],
                                          names=['Name','Exam'])

df = pd.DataFrame(np.random.randint(80,151,size = (8,2)),
             index = index_labels,# 行索引
             columns=['Python','Math']) # 列索引
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,Math
Name,Exam,Unnamed: 2_level_1,Unnamed: 3_level_1
张三,期中,94,135
张三,期末,124,126
李四,期中,129,134
李四,期末,80,130
王五,期中,82,86
王五,期末,100,83
赵六,期中,99,140
赵六,期末,109,104


### 保存

In [24]:
df.to_csv('./multi_index.csv')

In [25]:
df.to_excel('./multi_index.xlsx')

### 读取

In [27]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,Math
Name,Exam,Unnamed: 2_level_1,Unnamed: 3_level_1
张三,期中,94,135
张三,期末,124,126
李四,期中,129,134
李四,期末,80,130
王五,期中,82,86
王五,期末,100,83
赵六,期中,99,140
赵六,期末,109,104


In [30]:
pd.read_csv('./multi_index.csv',index_col=[0,1])

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,Math
Name,Exam,Unnamed: 2_level_1,Unnamed: 3_level_1
张三,期中,94,135
张三,期末,124,126
李四,期中,129,134
李四,期末,80,130
王五,期中,82,86
王五,期末,100,83
赵六,期中,99,140
赵六,期末,109,104


In [33]:
pd.read_csv('./multi_index.csv',index_col=['Name','Exam'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,Math
Name,Exam,Unnamed: 2_level_1,Unnamed: 3_level_1
张三,期中,94,135
张三,期末,124,126
李四,期中,129,134
李四,期末,80,130
王五,期中,82,86
王五,期末,100,83
赵六,期中,99,140
赵六,期末,109,104


In [35]:
pd.read_excel('./multi_index.xlsx',index_col=[0,1])

Unnamed: 0_level_0,Unnamed: 1_level_0,Python,Math
Name,Exam,Unnamed: 2_level_1,Unnamed: 3_level_1
张三,期中,94,135
张三,期末,124,126
李四,期中,129,134
李四,期末,80,130
王五,期中,82,86
王五,期末,100,83
赵六,期中,99,140
赵六,期末,109,104


In [36]:
df2 = pd.DataFrame(np.random.randint(80,151,size = (2,4)),
             index=['张三','李四'],
             columns=pd.MultiIndex.from_product([['Python','Math'],
                                                 ['期中','期末']]))
df2

Unnamed: 0_level_0,Python,Python,Math,Math
Unnamed: 0_level_1,期中,期末,期中,期末
张三,136,93,122,112
李四,135,123,86,146


In [45]:
df2.to_csv('./multi_index2.csv',index=True)

In [47]:
pd.read_csv('multi_index2.csv',header=[0,1],index_col=0)

Unnamed: 0_level_0,Python,Python,Math,Math
Unnamed: 0_level_1,期中,期末,期中,期末
张三,136,93,122,112
李四,135,123,86,146


In [42]:
df2.to_excel('multi_index2.xlsx')

In [44]:
pd.read_excel('multi_index2.xlsx',header=[0,1],index_col=0)

Unnamed: 0_level_0,Python,Python,Math,Math
Unnamed: 0_level_1,期中,期末,期中,期末
张三,136,93,122,112
李四,135,123,86,146
