In [None]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML,Markdown

def display_content(content, title=None, title_style='bold'):
    if title is not None:
        if title_style == 'bold':
            title = f'**{title}**'
        display(Markdown(f"{title}"))
    display(content)


# 基本属性
--------------

* 值序列
* 索引
* 类似字典

## 值序列

In [None]:
s11 = pd.Series(range(5))

display_content(s11.values, '值序列')

## 索引

In [None]:
s12 = pd.Series(range(5), index=list("abcde"))
display_content(s12.index,  '索引')

## 类似字典

In [None]:
s13 = pd.Series({'a': 1, 'b': 2, 'c':3})
display_content(s13, 's13初始值')
display_content(s13['a'], "<font color=green>s13['a']</font> 取index为a的值")


# 创建

## 值序列 + 索引

In [None]:
s21 = pd.Series([1,2,3], index=['a', 'b', 'c'])
display_content(s21, '值序列+索引的创建例子')

## 2.2 通过字典创建

In [None]:
s22 = pd.Series({'a': 1, 'b': 2, 'c': 3})
display_content(s22, "通过字典创建的例子：<font color=green>pd.Series({'a': 1, 'b': 2, 'c': 3} </font>")

# 访问

## 整数索引

In [None]:
s31 = pd.Series({'Joe': 100, 'Helen': 98, 'Jack': 87, 'Allen': 90, 'May': 78})
display_content(s31, '**初始值**')

s31_1 = s31[1] # 返回单个元素
display_content(s31_1, '**<font color=green>s31[1]</font> 通过单个整数索引返回单个元素**')

s31_2 = s31[1:3] # 返回Series
display_content(s31_2, '**<font color=green>s31[1:3]</font> 通过切片选择元素**')

s31_3 = s31[1:2] # 返回Series，虽然是一个元素
display_content(s31_3, '**<font color=green>s31[1:2]</font> 通过切片选择元素，即使是仅返回一个元素**')
# 31['Helen':]) #

## 布尔索引

In [None]:
s32 = pd.Series({'Joe': 100, 'Helen': 98, 'Jack': 87, 'Allen': 90, 'May': 78})
s321 = s32[s32 > 90]

display_content(s32, '初始值 <font color=green>s32</font>:')
display_content(s32 > 90, '**<font color=green>s32>90</font> 返回布尔Series:**')
display_content(s32[s32>90], '**<font color=green>s32[s32>90]</font> 通过布尔索引选择元素:**')

## 标签索引

In [None]:
s33 = pd.Series({'Joe': 100, 'Helen': 98, 'Jack': 87, 'Allen': 90, 'May': 78})
display_content(s33, '初始值 <font color=green>s33</font>')

s33_1 = s33['Helen']
display_content(s33['Helen'], "<font color=green>s33['Helen']</font> 通过标签选择单个元素")

s33_2 = s33[['Helen', 'Jack']]
display_content(s33[['Helen', 'Jack']], "<font color=green>s33[['Helen', 'Jack']]</font> 通过标签数组选择多个元素")

s33_3 = s33['Helen':'May']        
display_content(s33['Helen':'May'], "<font color=green>s33['Helen':'May']</font> 通过标签切片选择多个元素")

# 修改

In [None]:
s4 = pd.Series({'Joe': 100, 'Helen': 98, 'Jack': 87, 'Allen': 90, 'May': 78})

display_content(s4, "初始值")


s4['May'] = 80
display_content(s4, "修改了某个值 <font color=green>s4['May'] = 80</font>")

s4['Eason'] = 95
display_content(s4, "增加了一条记录 <font color=green>s4['Eason']</font>")



# 删除

In [None]:
s5 = pd.Series({'Joe': 100, 'Helen': 98, 'Jack': 87, 'Allen': 90, 'May': 78})

display_content(s5, "初始值")


s5_1 = s5.drop(['Helen', 'Allen'])
display_content(s5_1, '删除记录后，原数据不变')

del s5['May']
display_content(s5, "在原数据上删除一条记录")



# 计算

## 数学计算

### 基础计算（Built-in）

In [None]:
s611 = pd.Series({'Joe': 100, 'Helen': 98, 'Jack': 87, 'Allen': 90, 'May': 78})
display_content(s611, '初始值 <font color=green>s611</font>')

s611_b = pd.Series({'Joe': 90, 'Helen': 92, 'May': 80})
display_content(s611_b, '初始值b <font color=green>s611_b</font>')
                

s611_1 = s611 + 10
display_content(s611_1, '基础标量计算: <font color=green>s611 + 10</font>  *二元 + 一元 = 二元*')


s611_2 = s611 ** 2
display_content(s611_2, '标量计算(平方): <font color=green>s611 ** 2</font>  *二元 + 一元 = 二元*')


s611_3 = s611 > 90
display_content(s611_3, '标量计算(比较): <font color=green>s611 > 90</font>  *二元 + 一元 = 二元*')



s611_4 = s611 + s611_b
display_content(s611_4, '基础向量计算（自动对齐）: <font color=green>s611 + s611_b</font>  *二元 + 二元 = 二元*')

s611_5 = 'Joe' in s611
display_content(s611_5, "in 符号计算: <font color=green>'Joe' in s611</font>, 判断key是否存在，类似dict")


s611_6 = 100 in s611.values
display_content(s611_6, "in 符号计算: <font color=green>100 in s611.values</font>, 判断值是否存在，类似dict")

s611_7 = ~s611_3
display_content(s611_7, '标量计算(取反): <font color=green>~(s611 > 90)</font>  *二元 + 一元 = 二元*')

### 通用函数计算

In [None]:
s612 = pd.Series({'Joe': 100, 'Helen': 98, 'Jack': 87, 'Allen': 90, 'May': None})

display_content(s612, '初始值 <font color=green>s612</font>')

s612_2 = np.sqrt(s612)
display_content(s612_2, 'np函数计算:<font color=green> np.sqrt(s612)</font>')

s612_3 = pd.isnull(s612)
display_content(s612_3, 'pd函数计算:<font color=green> pd.isnull(s612)</font>')

s612_4 = s612.count()
display_content(s612_4, 'series函数计算:<font color=green> s612.count()</font>')




## 统计计算

### Series 自带统计函数

In [None]:
s621 = pd.Series({'Joe': 100, 'Helen': 98, 'Jack': 87, 'Allen': 90, 'May': None})

display_content(s621, '初始值 <font color=green>s621</font>')

s621_1 = s621.max()
display_content(s621_1, '最大值 <font color=green>s621.max()</font>')

s621_2 = s621.min()
display_content(s621_2, '最小值 <font color=green>s621.min()</font>')

s621_3 = s621.mean()
display_content(s62_3, '平均值 <font color=green>s621.mean()</font>')

s621_4 = s621.std()
display_content(s62_4, '标准差 <font color=green>s621.std()</font>')

s621_5 = s621.sum()
display_content(s621_5, '和 <font color=green>s621.sum()</font>')


s621_6 = s621.prod()
display_content(s621_6, '积 <font color=green>s621.prod()</font>')

s621_7 = s621.cumsum()
display_content(s621_7, '累计计算和 <font color=green>s621.cumsum()</font>')

s621_8 = pd.Series({'Joe': 97, 'Helen': 84, 'Jack': 79, 'Allen': 90, 'May': 20})

s621_9 = s621.corr(s621_8)
display_content(s621_9, '相关系数')

s621_10 = s621.cov(s621_8)
display_content(s621_10, '协方差')


### np统计函数

In [None]:
s622 = pd.Series({'Joe': 100, 'Helen': 98, 'Jack': 87, 'Allen': 90, 'May': None})
display_content(s622, '初始值 <font color=green>s622</font>')

s622_1 = np.sum(s622)
display_content(s622_1, '和 <font color=green>np.sum(s622)</font>')

s622_2 = np.prod(s622)
display_content(s622_2, '积 <font color=green>np.prod(s622)</font>')

s622_3 = np.cumsum(s622)
display_content(s622_3, '累积计算 <font color=green>np.cumsum(s622)</font>')



### agg聚合函数

In [None]:
s623 = pd.Series({'Joe': 100, 'Helen': 98, 'Jack': 87, 'Allen': 90, 'May': None})
s623.agg(['mean', 'std', 'count'])

# 索引

## 基本操作

In [None]:
s71 = pd.Series([100, 98, 87], index=[0,2,4])

display_content(s71, '初始值')

s71_1 = s71.reindex(range(6))
display_content(s71_1, '重建索引')

s71_2 = s71.reindex(range(6), method='ffill')
display_content(s71_2, '重建索引:向后填充')

s71_3 = s71.reindex(range(6), fill_value=60)
display_content(s71_3, '重建索引:默认填充')


## 多层索引

In [229]:
s72 = pd.Series([100,98,97, 78, 67, 89, 59, 85, 64], index=[['Joe','Joe','Joe', 'Bob','Bob','Bob', 'May','May','May'], ['math','english', 'history','math', 'english', 'history','math', 'english', 'history']])
s72.index.names = ['name', 'subject']
display_content(s72, '初始值')

s72_1 = s72.index
display_content(s72_1, '多层索引')

s72_2 = s72['Joe']
display_content(s72_2, '降维成一层索引')

s72_3 = s72.unstack()
display_content(s72_3, '变为二维dataframe')

s72_4 = s72_3.unstack()
display_content(s72_4, '变为多层索引的Series')

s72_5 = s72.swaplevel()
display_content(s72_5, '交换索引层级')

s72_6 = s72.sort_index(level=1)
display_content(s72_6, '分层排序')




**初始值**

name  subject
Joe   math       100
      english     98
      history     97
Bob   math        78
      english     67
      history     89
May   math        59
      english     85
      history     64
dtype: int64

**多层索引**

MultiIndex([('Joe',    'math'),
            ('Joe', 'english'),
            ('Joe', 'history'),
            ('Bob',    'math'),
            ('Bob', 'english'),
            ('Bob', 'history'),
            ('May',    'math'),
            ('May', 'english'),
            ('May', 'history')],
           names=['name', 'subject'])

**降维成一层索引**

subject
math       100
english     98
history     97
dtype: int64

**变为二维dataframe**

subject,english,history,math
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bob,67,89,78
Joe,98,97,100
May,85,64,59


**变为多层索引的Series**

subject  name
english  Bob      67
         Joe      98
         May      85
history  Bob      89
         Joe      97
         May      64
math     Bob      78
         Joe     100
         May      59
dtype: int64

**交换索引层级**

subject  name
math     Joe     100
english  Joe      98
history  Joe      97
math     Bob      78
english  Bob      67
history  Bob      89
math     May      59
english  May      85
history  May      64
dtype: int64

**分层排序**

name  subject
Bob   english     67
Joe   english     98
May   english     85
Bob   history     89
Joe   history     97
May   history     64
Bob   math        78
Joe   math       100
May   math        59
dtype: int64

# 数据处理专题

## 排序与排名

In [None]:
s81 = pd.Series({'Joe':100, 'Alice': 98, 'Bob': 87})
display_content(s81, '初始值')

s81_1 = s81.sort_index()
display_content(s81_1, '按index排序')

s81_2 = s81.sort_values()
display_content(s81_2, '按value排序')

s81_3 = s81.sort_values(ascending=False)
display_content(s81_3, '按value倒序')

s81_4 = s81.rank()
display_content(s81_4, '按value排名')




## 空值处理

In [None]:
s82 = pd.Series({'Joe':100, 'Alice': 98, 'Bob': 87, 'May': None})
display_content(s82, '初始值')

s82_1 = s82.dropna()
display_content(s82_1, '过滤掉空值')

s82_2 = s82.fillna(60)
display_content(s82_2, '填充空值')
                
s82_3 = s82.isnull()
display_content(s82_3, '判断是否为null, 返回布尔数组')

s82_4 =  s82.notnull()
display_content(s82_4, '判断是否为非空, 返回布尔数组')


## 字符串处理

In [None]:
s83 = pd.Series(['Joe', 'Helen', 'Alice'])
display_content(s83, '初始值')

s83_1 = s83.str.lower()
display_content(s83_1, '转为小写')

s83_2 = s83.str.contains('l')
display_content(s83_2, '是否包含')

## 自定义映射函数

In [None]:
s84 = pd.Series({'Joe':100, 'Alice': 78, 'Bob': 87, 'May':58})
display_content(s84, '初始值')

def grade(x):
    if x >= 90:
        return '优秀'
    elif x >= 80:
        return '良好'
    elif x >=60:
        return '及格'
    else:
        return '不及格'
        
s84_1 = s84.map(grade)
display_content(s84_1, '映射为grade')


## 其他常用函数

In [None]:
s85 = pd.Series({'Joe':100, 'Alice': 78, 'Bob': 87, 'May':78})
display_content(s85, '初始值')

s85_1 = s85.unique()
display_content(s85_1, 'unique值')

s85_2 = s85.isin([100, 87])
display_content(s85_2, '是否在values中')

s85_3 = s85.isin(s85)
display_content(s85_3, '是否在values中,values可以是Series')

s85_4 = s85.value_counts()
display_content(s85_4, 'value_counts')

s85_5 = pd.Series([True, False, True, False, True])

s85_6 = s85.all()
display_content(s85_6, '是否均为true')

s85_7 = s85.any()
display_content(s85_7, '是否存在true')