In [None]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML,Markdown

def display_content(content, title=None, title_style='bold'):
    if title is not None:
        if title_style == 'bold':
            title = f'**{title}**'
        display(Markdown(f"{title}"))
    display(content)


# 基础属性

* 排序的列集合
* 行列索引
* 共享索性的多个series
* 多层索引 高维 
* 列元素的类型是一样的，但是行不是，行列不是完全对称的

In [None]:
df1 = pd.DataFrame({'name': ['Joe', 'Helen'], 'score': [98, 80]})
display_content(df1, '初始值')

df1_1 = df1.values
display_content(df1_1, 'df1的值')

df1_2 = df1.columns
display_content(df1_2, 'df1的columns')


df1_3 = df1.index
display_content(df1_3, 'df1的index')


# 创建

## 列集合 字典

In [None]:
df21_1 = pd.DataFrame({'name': ['Joe', 'Helen'], 'score': [98, 80]})
display_content(df21_1, "通过 {col: [col_value, col_value2]} 创建：<font color=green>pd.DataFrame({'name': ['Joe', 'Helen'], 'score': [98, 80]}) </font>")

df21_2 = pd.DataFrame({'name': ('Joe', 'Helen'), 'score': (98, 80)})
display_content(df21_2, "通过 {col: (col_value, col_value2)} 创建：<font color=green>pd.DataFrame({'name': ('Joe', 'Helen'), 'score': (98, 80)}) </font>")

df21_3 = pd.DataFrame({'math_score': {'Joe':98, 'Helen': 80}, 'english_score': {'Joe':97, 'May': 70}})
display_content(df21_3, "通过 {col: {row_index, col_value}} 创建：<font color=green>pd.DataFrame({'math_score': {'Joe':98, 'Helen': 80}, 'english_score': {'Joe':97, 'May': 70}}) </font>")

s21_4 = pd.Series([90, 80], index=['Joe', 'Helen'])
df21_4 = pd.DataFrame({'score': s21_4})
display_content(df21_4, "通过 {col: series} 创建：<font color=green>pd.DataFrame({'score': pd.Series([90, 80], index=['Joe', 'Helen'])})</font>")


## 行集合 列表

In [None]:
s22_1 = pd.DataFrame([['Joe', 98], ['Helen', 80]], columns=['name', 'score'])
display_content(s22_1, "通过 2d array 创建：<font color=green>pd.DataFrame([['Joe', 98], ['Helen', 80]], columns=['name', 'score'])</font>")

s22_2 = pd.DataFrame([[98, 99], [80, 70]], columns=['math', 'english'], index=['Joe', 'Helen'])
display_content(s22_2, "通过 2d array with hindex 创建：<font color=green>pd.DataFrame([['Joe', 98], ['Helen', 80]], columns=['name', 'score'])</font>")


## 通过已有数据创建

In [None]:
s23 = pd.DataFrame([['Joe', 98], ['Helen', 80]], columns=['name', 'score'])

s23_1 = pd.DataFrame(s23)
display_content(s23_1, "通过已有数据创建<font color=green>pd.DataFrame(s23)</font>")

# 访问

## 普通访问

In [None]:
df31 = pd.DataFrame([['Joe', 98, 100], ['Helen', 80, 89], ['Allen',90, 98], ['Jack', 87, 78]], columns=['name', 'math', 'english'])

display_content(df31, '初始值')



df31_1 = df31['name']
display_content(df31_1, '选择单列')


df31_2 = df31[['name','math']]
display_content(df31_2, '选择指定列')

df31_3 = df31[1:2]            
display_content(df31_3, '切片选择行')

df31_4 = df31[df31['math'] > 90]
display_content(df31_4, '布尔选择行')


## [numpy 风格](https://wizardforcel.gitbooks.io/pyda-2e/content/img/7178691-64354f2ab777bd8c.png)

### loc 标签索引

In [None]:
df321 = pd.DataFrame([['Joe', 98, 100], ['Helen', 80, 89], ['Allen',90, 98], ['Jack', 87, 78]], columns=['name', 'math', 'english'])
df321_1 = df321.set_index('name')
display_content(df321_1, '初始值')

df321_2 = df321_1.loc['Joe']
display_content(df321_1, '选择单行')

df321_2 = df321_1.loc[['Joe', 'Helen']]
display_content(df321_2, '选择多行')

df321_3 = df321_1.loc['Joe':'Helen']
display_content(df321_3, '切片选择多行')

df321_4 = df321_1.loc[:, 'math']
display_content(df321_4, '选择单列')

df321_5 = df321_1.loc[:, ['math','english']]
display_content(df321_5, '选择多列')

df321_6 = df321_1.loc[:, 'math':'english']
display_content(df321_6, '切片选择多列')

df321_7 = df321_1.loc['Joe':'Helen','math']
display_content(df321_7, '混合选择')


### iloc整数索引

In [None]:
df322 = pd.DataFrame([['Joe', 98, 100], ['Helen', 80, 89], ['Allen',90, 98], ['Jack', 87, 78]], columns=['name', 'math', 'english'])
display_content(df322, '初始值')

df322_1 = df322.iloc[0]
display_content(df322_1, '选择单行')

df322_2 = df322.iloc[[0,1]]
display_content(df322_2, '选择多行')

df322_3 = df322.iloc[0:2]
display_content(df322_3, '切片选择多行')

df322_4 = df322.iloc[:,1]
display_content(df322_4, '选择单列')

df322_5 = df322.iloc[:, [0,1]]
display_content(df322_5, '选择多列')

df322_6 = df322.iloc[:, 0:1]
display_content(df322_6, '切片选择多列')

df322_7 = df322.iloc[0,1:2]
display_content(df322_7, '混合选择')


## 其他索引

In [132]:
df33 = pd.DataFrame([['Joe', 98, 100], ['Helen', 80, 89], ['Allen',90, 98], ['Jack', 87, 78]], columns=['name', 'math', 'english'])
display_content(df33, '初始值')

display_content(df33, "")

**初始值**

Unnamed: 0,name,math,english
0,Joe,98,100
1,Helen,80,89
2,Allen,90,98
3,Jack,87,78


'Joe'

# 修改

In [None]:
df4 = pd.DataFrame([['Joe', 98], ['Helen', 80]], columns=['name', 'score'])
display_content(df4, "初始值")


df4_1 = df4.rename(columns={'score': 'math'})
display_content(df4_1, "将score修改为math")


df4_1['english'] = [90, 88]

display_content(df4_1, "增加english列")


# 删除

In [None]:
df5 = pd.DataFrame([['Joe', 98], ['Helen', 80], ['Allen',90], ['Jack', 87]], columns=['name', 'score'])
display_content(df5, '初始值')

df5_1 = df5.drop([2,3])
display_content(df5_1, '删除行,原数据不变')

df5_2 = df5.drop(columns=['score'])
display_content(df5_2, '删除列,原数据不变')

del df5['score']
display_content(df5, '在原数据上删除score列')


# 计算

# 索引

## 基本操作

In [None]:
df71 = pd.DataFrame([[100, 98], [90, 80]], columns=['math', 'english'], index=['Joe', 'Helen'])
df71.index.name = 'name'
df71.reindex()

In [None]:
df71