# Pandas 基础 & 进阶

## 1、创建 Pandas

In [1]:
import pandas as pd

In [2]:
# 通过字典、列表创建 DataFrame
country1 = pd.Series({'Name': '中国',
                    'Language': 'Chinese',
                    'Area': '9.597M km2',
                     'Happiness Rank': 79})

country2 = pd.Series({'Name': '美国',
                    'Language': 'English (US)',
                    'Area': '9.834M km2',
                     'Happiness Rank': 14})

country3 = pd.Series({'Name': '澳大利亚',
                    'Language': 'English (AU)',
                    'Area': '7.692M km2',
                     'Happiness Rank': 9})
df = pd.DataFrame([country1, country2, country3], index=['CH', 'US', 'AU'])
df

Unnamed: 0,Area,Happiness Rank,Language,Name
CH,9.597M km2,79,Chinese,中国
US,9.834M km2,14,English (US),美国
AU,7.692M km2,9,English (AU),澳大利亚


In [3]:
# 添加数据
df['Location'] = '地球' # 如果个数小于要求的个数，会自动进行“广播”操作。如果大于要求的个数，会报错
df['Region'] = ['亚洲', '北美洲', '大洋洲']
df

Unnamed: 0,Area,Happiness Rank,Language,Name,Location,Region
CH,9.597M km2,79,Chinese,中国,地球,亚洲
US,9.834M km2,14,English (US),美国,地球,北美洲
AU,7.692M km2,9,English (AU),澳大利亚,地球,大洋洲


## 2、Pandas 索引

In [4]:
# 行索引（选择行）
print('loc(位置索引):')
print(df.loc['CH'])

print()

print('iloc（整型位置索引）:')
print(df.iloc[0])

loc(位置索引):
Area              9.597M km2
Happiness Rank            79
Language             Chinese
Name                      中国
Location                  地球
Region                    亚洲
Name: CH, dtype: object

iloc（整型位置索引）:
Area              9.597M km2
Happiness Rank            79
Language             Chinese
Name                      中国
Location                  地球
Region                    亚洲
Name: CH, dtype: object


In [5]:
# 列索引（选择列）
df['Area']

CH    9.597M km2
US    9.834M km2
AU    7.692M km2
Name: Area, dtype: object

In [6]:
# 选择不连续的列数据
df[['Name', 'Area']]

Unnamed: 0,Name,Area
CH,中国,9.597M km2
US,美国,9.834M km2
AU,澳大利亚,7.692M km2


In [7]:
# 混合索引
print('先取列，再取行：')
print(df['Area'].loc[['CH', 'US']])

print()

print('先取行，再取列：')
print(df.loc['CH'][['Area', 'Name']])

先取列，再取行：
CH    9.597M km2
US    9.834M km2
Name: Area, dtype: object

先取行，再取列：
Area    9.597M km2
Name            中国
Name: CH, dtype: object


In [8]:
# 转置
df.T

Unnamed: 0,CH,US,AU
Area,9.597M km2,9.834M km2,7.692M km2
Happiness Rank,79,14,9
Language,Chinese,English (US),English (AU)
Name,中国,美国,澳大利亚
Location,地球,地球,地球
Region,亚洲,北美洲,大洋洲


## 3、删除数据

In [9]:
# 删除行，非原地操作
df2 = df.drop('CH')
df2

Unnamed: 0,Area,Happiness Rank,Language,Name,Location,Region
US,9.834M km2,14,English (US),美国,地球,北美洲
AU,7.692M km2,9,English (AU),澳大利亚,地球,大洋洲


In [10]:
# 删除行，原地操作
df.drop('CH', inplace=True)
df

Unnamed: 0,Area,Happiness Rank,Language,Name,Location,Region
US,9.834M km2,14,English (US),美国,地球,北美洲
AU,7.692M km2,9,English (AU),澳大利亚,地球,大洋洲


In [11]:
# 删除列
df.drop('Area', inplace=True, axis='columns')
df

Unnamed: 0,Happiness Rank,Language,Name,Location,Region
US,14,English (US),美国,地球,北美洲
AU,9,English (AU),澳大利亚,地球,大洋洲


In [12]:
# 也可以用 del 关键字删除
del df['Name']
df

Unnamed: 0,Happiness Rank,Language,Location,Region
US,14,English (US),地球,北美洲
AU,9,English (AU),地球,大洋洲


## 4、DataFrame 操作与加载

注意：各个函数的 inplace 属性。

In [13]:
# 注意：操作取出的数据，原始 DataFrame 也会更改
ranks = df['Happiness Rank']
ranks += 2
df

Unnamed: 0,Happiness Rank,Language,Location,Region
US,16,English (US),地球,北美洲
AU,11,English (AU),地球,大洋洲


In [14]:
# 安全操作是使用 copy()，不更改原始 DataFrame
ranks = df['Happiness Rank'].copy()
ranks += 2
print(ranks)
print()
df

US    18
AU    13
Name: Happiness Rank, dtype: int64



Unnamed: 0,Happiness Rank,Language,Location,Region
US,16,English (US),地球,北美洲
AU,11,English (AU),地球,大洋洲


In [15]:
# 加载 CSV 文件
reprot_2015_df = pd.read_csv('./2015.csv')
reprot_2015_df.head()  # 预览数据

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201
2,Denmark,Western Europe,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,Norway,Western Europe,4,7.522,0.0388,1.459,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,Canada,North America,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176


In [16]:
# 查看数据信息
reprot_2015_df.info() # 可以查看有没有缺失数据、每列数据类型

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 158 entries, 0 to 157
Data columns (total 12 columns):
Country                          158 non-null object
Region                           158 non-null object
Happiness Rank                   158 non-null int64
Happiness Score                  158 non-null float64
Standard Error                   158 non-null float64
Economy (GDP per Capita)         158 non-null float64
Family                           158 non-null float64
Health (Life Expectancy)         158 non-null float64
Freedom                          158 non-null float64
Trust (Government Corruption)    158 non-null float64
Generosity                       158 non-null float64
Dystopia Residual                158 non-null float64
dtypes: float64(9), int64(1), object(2)
memory usage: 14.9+ KB


In [17]:
# index_col（指定索引列）、usecols（指定需要读取的列）
reprot_2015_df = pd.read_csv('./2015.csv', 
                             index_col='Country', 
                             usecols=['Country', 'Happiness Rank', 'Happiness Score', 'Region'])
reprot_2015_df.head()

Unnamed: 0_level_0,Region,Happiness Rank,Happiness Score
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Switzerland,Western Europe,1,7.587
Iceland,Western Europe,2,7.561
Denmark,Western Europe,3,7.527
Norway,Western Europe,4,7.522
Canada,North America,5,7.427


In [18]:
print('列名：', reprot_2015_df.columns)
print()
print('行名：', reprot_2015_df.index)

列名： Index(['Region', 'Happiness Rank', 'Happiness Score'], dtype='object')

行名： Index(['Switzerland', 'Iceland', 'Denmark', 'Norway', 'Canada', 'Finland',
       'Netherlands', 'Sweden', 'New Zealand', 'Australia',
       ...
       'Chad', 'Guinea', 'Ivory Coast', 'Burkina Faso', 'Afghanistan',
       'Rwanda', 'Benin', 'Syria', 'Burundi', 'Togo'],
      dtype='object', name='Country', length=158)


In [24]:
# 重置 index（恢复默认 index），index 不可更改。有 inplace 属性。
reprot_2015_df.reset_index().head()

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score
0,Switzerland,Western Europe,1,7.587
1,Iceland,Western Europe,2,7.561
2,Denmark,Western Europe,3,7.527
3,Norway,Western Europe,4,7.522
4,Canada,North America,5,7.427


In [31]:
# 重命名 columns
reprot_2015_df.rename(columns={'Region':'地区', 'Happiness Rank':'排名', 'Happiness Score':'幸福指数'}, 
                      inplace=True)
reprot_2015_df.head()

Unnamed: 0_level_0,地区,排名,幸福指数
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Switzerland,Western Europe,1,7.587
Iceland,Western Europe,2,7.561
Denmark,Western Europe,3,7.527
Norway,Western Europe,4,7.522
Canada,North America,5,7.427


In [33]:
# reindex() 重新排序或者设置 index、columns，原本没有的 index 或 columns 标签则赋空值
reprot_2015_df.reindex(columns=['排名', '幸福指数', '地区', '新加列']).head()

Unnamed: 0_level_0,排名,幸福指数,地区,新加列
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Switzerland,1,7.587,Western Europe,
Iceland,2,7.561,Western Europe,
Denmark,3,7.527,Western Europe,
Norway,4,7.522,Western Europe,
Canada,5,7.427,North America,


## 5、Boolean Mask（布尔遮罩）

In [36]:
# 过滤 Western Europe 的国家
only_westren_europe = reprot_2015_df['地区'] == 'Western Europe'  # 过滤条件（也就是 Boolean Mask）
reprot_2015_df[only_westren_europe]  # 按照条件输出

Unnamed: 0_level_0,地区,排名,幸福指数
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Switzerland,Western Europe,1,7.587
Iceland,Western Europe,2,7.561
Denmark,Western Europe,3,7.527
Norway,Western Europe,4,7.522
Finland,Western Europe,6,7.406
Netherlands,Western Europe,7,7.378
Sweden,Western Europe,8,7.364
Austria,Western Europe,13,7.2
Luxembourg,Western Europe,17,6.946
Ireland,Western Europe,18,6.94


In [39]:
# 过滤 Western Europe 并且排名在 10 之外的国家
only_westren_europe_10 = (reprot_2015_df['地区'] == 'Western Europe') & (reprot_2015_df['排名'] >= 10)
reprot_2015_df[only_westren_europe_10]

Unnamed: 0_level_0,地区,排名,幸福指数
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Austria,Western Europe,13,7.2
Luxembourg,Western Europe,17,6.946
Ireland,Western Europe,18,6.94
Belgium,Western Europe,19,6.937
United Kingdom,Western Europe,21,6.867
Germany,Western Europe,26,6.75
France,Western Europe,29,6.575
Spain,Western Europe,36,6.329
Malta,Western Europe,37,6.302
Italy,Western Europe,50,5.948


In [40]:
# 熟练后可以写在一行中
reprot_2015_df[(reprot_2015_df['地区'] == 'Western Europe') & (reprot_2015_df['排名'] >= 10)]

Unnamed: 0_level_0,地区,排名,幸福指数
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Austria,Western Europe,13,7.2
Luxembourg,Western Europe,17,6.946
Ireland,Western Europe,18,6.94
Belgium,Western Europe,19,6.937
United Kingdom,Western Europe,21,6.867
Germany,Western Europe,26,6.75
France,Western Europe,29,6.575
Spain,Western Europe,36,6.329
Malta,Western Europe,37,6.302
Italy,Western Europe,50,5.948


## 6、层级索引

In [42]:
reprot_2015_df = pd.read_csv('./2015.csv')
reprot_2015_df.head()

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201
2,Denmark,Western Europe,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,Norway,Western Europe,4,7.522,0.0388,1.459,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,Canada,North America,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176


In [45]:
# 设置层级层级索引
reprot_2015_df2 = reprot_2015_df.set_index(['Region', 'Country'])
reprot_2015_df2.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
Region,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Western Europe,Switzerland,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
Western Europe,Iceland,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201
Western Europe,Denmark,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
Western Europe,Norway,4,7.522,0.0388,1.459,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
North America,Canada,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
Western Europe,Finland,6,7.406,0.0314,1.29025,1.31826,0.88911,0.64169,0.41372,0.23351,2.61955
Western Europe,Netherlands,7,7.378,0.02799,1.32944,1.28017,0.89284,0.61576,0.31814,0.4761,2.4657
Western Europe,Sweden,8,7.364,0.03157,1.33171,1.28907,0.91087,0.6598,0.43844,0.36262,2.37119
Australia and New Zealand,New Zealand,9,7.286,0.03371,1.25018,1.31967,0.90837,0.63938,0.42922,0.47501,2.26425
Australia and New Zealand,Australia,10,7.284,0.04083,1.33358,1.30923,0.93156,0.65124,0.35637,0.43562,2.26646


In [51]:
# level0 索引
reprot_2015_df2.loc['Western Europe']

Unnamed: 0_level_0,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Switzerland,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
Iceland,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201
Denmark,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
Norway,4,7.522,0.0388,1.459,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
Finland,6,7.406,0.0314,1.29025,1.31826,0.88911,0.64169,0.41372,0.23351,2.61955
Netherlands,7,7.378,0.02799,1.32944,1.28017,0.89284,0.61576,0.31814,0.4761,2.4657
Sweden,8,7.364,0.03157,1.33171,1.28907,0.91087,0.6598,0.43844,0.36262,2.37119
Austria,13,7.2,0.03751,1.33723,1.29704,0.89042,0.62433,0.18676,0.33088,2.5332
Luxembourg,17,6.946,0.03499,1.56391,1.21963,0.91894,0.61583,0.37798,0.28034,1.96961
Ireland,18,6.94,0.03676,1.33596,1.36948,0.89533,0.61777,0.28703,0.45901,1.9757


In [52]:
# 两层索引（选择具体一行）
reprot_2015_df2.loc['Western Europe', 'Switzerland']

Happiness Rank                   1.00000
Happiness Score                  7.58700
Standard Error                   0.03411
Economy (GDP per Capita)         1.39651
Family                           1.34951
Health (Life Expectancy)         0.94143
Freedom                          0.66557
Trust (Government Corruption)    0.41978
Generosity                       0.29678
Dystopia Residual                2.51738
Name: (Western Europe, Switzerland), dtype: float64

In [55]:
# 交换分层顺序
reprot_2015_df2.swaplevel()

Unnamed: 0_level_0,Unnamed: 1_level_0,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
Country,Region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.43630,2.70201
Denmark,Western Europe,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
Norway,Western Europe,4,7.522,0.03880,1.45900,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
Canada,North America,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176
Finland,Western Europe,6,7.406,0.03140,1.29025,1.31826,0.88911,0.64169,0.41372,0.23351,2.61955
Netherlands,Western Europe,7,7.378,0.02799,1.32944,1.28017,0.89284,0.61576,0.31814,0.47610,2.46570
Sweden,Western Europe,8,7.364,0.03157,1.33171,1.28907,0.91087,0.65980,0.43844,0.36262,2.37119
New Zealand,Australia and New Zealand,9,7.286,0.03371,1.25018,1.31967,0.90837,0.63938,0.42922,0.47501,2.26425
Australia,Australia and New Zealand,10,7.284,0.04083,1.33358,1.30923,0.93156,0.65124,0.35637,0.43562,2.26646


In [58]:
# 排序分层
reprot_2015_df2.sort_index(level=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
Region,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Australia and New Zealand,Australia,10,7.284,0.04083,1.33358,1.30923,0.93156,0.65124,0.35637,0.43562,2.26646
Australia and New Zealand,New Zealand,9,7.286,0.03371,1.25018,1.31967,0.90837,0.63938,0.42922,0.47501,2.26425
Central and Eastern Europe,Albania,95,4.959,0.05013,0.87867,0.80434,0.81325,0.35733,0.06413,0.14272,1.89894
Central and Eastern Europe,Armenia,127,4.350,0.04763,0.76821,0.77711,0.72990,0.19847,0.03900,0.07855,1.75873
Central and Eastern Europe,Azerbaijan,80,5.212,0.03363,1.02389,0.93793,0.64045,0.37030,0.16065,0.07799,2.00073
Central and Eastern Europe,Belarus,59,5.813,0.03938,1.03192,1.23289,0.73608,0.37938,0.19090,0.11046,2.13090
Central and Eastern Europe,Bosnia and Herzegovina,96,4.949,0.06913,0.83223,0.91916,0.79081,0.09245,0.00227,0.24808,2.06367
Central and Eastern Europe,Bulgaria,134,4.218,0.04828,1.01216,1.10614,0.76649,0.30587,0.00872,0.11921,0.89991
Central and Eastern Europe,Croatia,62,5.759,0.04394,1.08254,0.79624,0.78805,0.25883,0.02430,0.05444,2.75414
Central and Eastern Europe,Czech Republic,31,6.505,0.04168,1.17898,1.20643,0.84483,0.46364,0.02652,0.10686,2.67782


## 7、数据清洗

In [88]:
log_data = pd.read_csv('./log.csv')
log_data.head()

Unnamed: 0,time,user,video,playback position,paused,volume
0,1469974424,cheryl,intro.html,5,False,10.0
1,1469974454,cheryl,intro.html,6,,
2,1469974544,cheryl,intro.html,9,,
3,1469974574,cheryl,intro.html,10,,
4,1469977514,bob,intro.html,1,,


In [89]:
log_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 6 columns):
time                 33 non-null int64
user                 33 non-null object
video                33 non-null object
playback position    33 non-null int64
paused               3 non-null object
volume               4 non-null float64
dtypes: float64(1), int64(2), object(3)
memory usage: 1.6+ KB


In [90]:
log_data.set_index(['time', 'user'], inplace=True)  # 设置层级索引
log_data.sort_index(level=0, inplace=True)  # 排序分层
log_data.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,video,playback position,paused,volume
time,user,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1469974424,cheryl,intro.html,5,False,10.0
1469974424,sue,advanced.html,23,False,10.0
1469974454,cheryl,intro.html,6,,
1469974454,sue,advanced.html,24,,
1469974484,cheryl,intro.html,7,,
1469974514,cheryl,intro.html,8,,
1469974524,sue,advanced.html,25,,
1469974544,cheryl,intro.html,9,,
1469974554,sue,advanced.html,26,,
1469974574,cheryl,intro.html,10,,


In [93]:
# 填充缺失值
log_data.fillna(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,video,playback position,paused,volume
time,user,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1469974424,cheryl,intro.html,5,False,10.0
1469974424,sue,advanced.html,23,False,10.0
1469974454,cheryl,intro.html,6,0,0.0
1469974454,sue,advanced.html,24,0,0.0
1469974484,cheryl,intro.html,7,0,0.0
1469974514,cheryl,intro.html,8,0,0.0
1469974524,sue,advanced.html,25,0,0.0
1469974544,cheryl,intro.html,9,0,0.0
1469974554,sue,advanced.html,26,0,0.0
1469974574,cheryl,intro.html,10,0,0.0


In [94]:
# 删除缺失值的行
log_data.dropna()

Unnamed: 0_level_0,Unnamed: 1_level_0,video,playback position,paused,volume
time,user,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1469974424,cheryl,intro.html,5,False,10.0
1469974424,sue,advanced.html,23,False,10.0
1469977424,bob,intro.html,1,True,10.0


In [95]:
# 用前一个值填充缺失值
log_data.ffill()

Unnamed: 0_level_0,Unnamed: 1_level_0,video,playback position,paused,volume
time,user,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1469974424,cheryl,intro.html,5,False,10.0
1469974424,sue,advanced.html,23,False,10.0
1469974454,cheryl,intro.html,6,False,10.0
1469974454,sue,advanced.html,24,False,10.0
1469974484,cheryl,intro.html,7,False,10.0
1469974514,cheryl,intro.html,8,False,10.0
1469974524,sue,advanced.html,25,False,10.0
1469974544,cheryl,intro.html,9,False,10.0
1469974554,sue,advanced.html,26,False,10.0
1469974574,cheryl,intro.html,10,False,10.0


In [96]:
# 用后一个值填充缺失值
log_data.bfill()

Unnamed: 0_level_0,Unnamed: 1_level_0,video,playback position,paused,volume
time,user,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1469974424,cheryl,intro.html,5,False,10.0
1469974424,sue,advanced.html,23,False,10.0
1469974454,cheryl,intro.html,6,True,5.0
1469974454,sue,advanced.html,24,True,5.0
1469974484,cheryl,intro.html,7,True,5.0
1469974514,cheryl,intro.html,8,True,5.0
1469974524,sue,advanced.html,25,True,5.0
1469974544,cheryl,intro.html,9,True,5.0
1469974554,sue,advanced.html,26,True,5.0
1469974574,cheryl,intro.html,10,True,5.0


In [98]:
log_data.replace('NaN', '777')

Unnamed: 0_level_0,Unnamed: 1_level_0,video,playback position,paused,volume
time,user,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1469974424,cheryl,intro.html,5,False,10.0
1469974424,sue,advanced.html,23,False,10.0
1469974454,cheryl,intro.html,6,,
1469974454,sue,advanced.html,24,,
1469974484,cheryl,intro.html,7,,
1469974514,cheryl,intro.html,8,,
1469974524,sue,advanced.html,25,,
1469974544,cheryl,intro.html,9,,
1469974554,sue,advanced.html,26,,
1469974574,cheryl,intro.html,10,,


In [103]:
# 最后，不要忘记 replace() 替换
import numpy as np
log_data.replace(np.nan, 7)

Unnamed: 0_level_0,Unnamed: 1_level_0,video,playback position,paused,volume
time,user,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1469974424,cheryl,intro.html,5,False,10.0
1469974424,sue,advanced.html,23,False,10.0
1469974454,cheryl,intro.html,6,7,7.0
1469974454,sue,advanced.html,24,7,7.0
1469974484,cheryl,intro.html,7,7,7.0
1469974514,cheryl,intro.html,8,7,7.0
1469974524,sue,advanced.html,25,7,7.0
1469974544,cheryl,intro.html,9,7,7.0
1469974554,sue,advanced.html,26,7,7.0
1469974574,cheryl,intro.html,10,7,7.0
