このノートブックは、[pandasのドキュメント](https://pandas.pydata.org/pandas-docs/stable/10min.html)を基にしています。

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Object Creation

In [2]:
s = pd.Series([1,3,5,np.nan,6,8])

s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range('20130101', periods=6)

dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

df

Unnamed: 0,A,B,C,D
2013-01-01,0.618253,0.502765,-0.067995,-0.06627
2013-01-02,-0.995121,0.91932,-1.221516,-1.016053
2013-01-03,1.379699,-0.936756,-1.276197,-0.020153
2013-01-04,0.007697,-0.088263,-0.987374,-0.188286
2013-01-05,-0.046063,-0.592812,-0.544709,-0.744876
2013-01-06,-0.033579,-0.937799,-2.322357,0.766281


In [5]:
df2 = pd.DataFrame({ 'A': 1.,
                     'B': pd.Timestamp('20130102'),
                     'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                     'D': np.array([3] * 4, dtype='int32'),
                     'E': pd.Categorical(['test', 'train', 'test', 'train']),
                     'F': 'foo' })

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [6]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

## Viewing Data

In [7]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.618253,0.502765,-0.067995,-0.06627
2013-01-02,-0.995121,0.91932,-1.221516,-1.016053
2013-01-03,1.379699,-0.936756,-1.276197,-0.020153
2013-01-04,0.007697,-0.088263,-0.987374,-0.188286
2013-01-05,-0.046063,-0.592812,-0.544709,-0.744876


In [8]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,0.007697,-0.088263,-0.987374,-0.188286
2013-01-05,-0.046063,-0.592812,-0.544709,-0.744876
2013-01-06,-0.033579,-0.937799,-2.322357,0.766281


In [9]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [10]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [11]:
df.values

array([[ 0.61825274,  0.50276504, -0.06799545, -0.06627048],
       [-0.99512073,  0.91932031, -1.22151646, -1.01605336],
       [ 1.37969906, -0.93675579, -1.27619681, -0.02015276],
       [ 0.00769677, -0.08826275, -0.98737373, -0.18828636],
       [-0.04606315, -0.5928116 , -0.54470861, -0.74487555],
       [-0.03357903, -0.93779933, -2.32235739,  0.76628084]])

In [12]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.155148,-0.188924,-1.070025,-0.21156
std,0.791828,0.774592,0.764283,0.623874
min,-0.995121,-0.937799,-2.322357,-1.016053
25%,-0.042942,-0.85077,-1.262527,-0.605728
50%,-0.012941,-0.340537,-1.104445,-0.127278
75%,0.465614,0.355008,-0.655375,-0.031682
max,1.379699,0.91932,-0.067995,0.766281


In [13]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,0.618253,-0.995121,1.379699,0.007697,-0.046063,-0.033579
B,0.502765,0.91932,-0.936756,-0.088263,-0.592812,-0.937799
C,-0.067995,-1.221516,-1.276197,-0.987374,-0.544709,-2.322357
D,-0.06627,-1.016053,-0.020153,-0.188286,-0.744876,0.766281


In [14]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-06,-0.033579,-0.937799,-2.322357,0.766281
2013-01-03,1.379699,-0.936756,-1.276197,-0.020153
2013-01-05,-0.046063,-0.592812,-0.544709,-0.744876
2013-01-04,0.007697,-0.088263,-0.987374,-0.188286
2013-01-01,0.618253,0.502765,-0.067995,-0.06627
2013-01-02,-0.995121,0.91932,-1.221516,-1.016053


## Selection

### Getting

In [15]:
df['A']

2013-01-01    0.618253
2013-01-02   -0.995121
2013-01-03    1.379699
2013-01-04    0.007697
2013-01-05   -0.046063
2013-01-06   -0.033579
Freq: D, Name: A, dtype: float64

In [16]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.618253,0.502765,-0.067995,-0.06627
2013-01-02,-0.995121,0.91932,-1.221516,-1.016053
2013-01-03,1.379699,-0.936756,-1.276197,-0.020153


### Selection by Label

In [17]:
df.loc[dates[0]]

A    0.618253
B    0.502765
C   -0.067995
D   -0.066270
Name: 2013-01-01 00:00:00, dtype: float64

In [18]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2013-01-01,0.618253,0.502765
2013-01-02,-0.995121,0.91932
2013-01-03,1.379699,-0.936756
2013-01-04,0.007697,-0.088263
2013-01-05,-0.046063,-0.592812
2013-01-06,-0.033579,-0.937799


In [19]:
df.loc['20130102': '20130104', ['A', 'B']]

Unnamed: 0,A,B
2013-01-02,-0.995121,0.91932
2013-01-03,1.379699,-0.936756
2013-01-04,0.007697,-0.088263


In [20]:
df.loc['20130102', ['A', 'B']]

A   -0.995121
B    0.919320
Name: 2013-01-02 00:00:00, dtype: float64

In [21]:
df.loc[dates[0], 'A']

0.61825274455811385

In [22]:
df.at[dates[0], 'A']

0.61825274455811385

### Selection by Position

In [23]:
df.iloc[3]

A    0.007697
B   -0.088263
C   -0.987374
D   -0.188286
Name: 2013-01-04 00:00:00, dtype: float64

In [24]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,0.007697,-0.088263
2013-01-05,-0.046063,-0.592812


In [25]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2013-01-02,-0.995121,-1.221516
2013-01-03,1.379699,-1.276197
2013-01-05,-0.046063,-0.544709


In [26]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,-0.995121,0.91932,-1.221516,-1.016053
2013-01-03,1.379699,-0.936756,-1.276197,-0.020153


In [27]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2013-01-01,0.502765,-0.067995
2013-01-02,0.91932,-1.221516
2013-01-03,-0.936756,-1.276197
2013-01-04,-0.088263,-0.987374
2013-01-05,-0.592812,-0.544709
2013-01-06,-0.937799,-2.322357


In [28]:
df.iloc[1, 1]

0.91932030913998264

In [29]:
df.iat[1, 1]

0.91932030913998264

### Boolean Indexing

In [30]:
df[df.A > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.618253,0.502765,-0.067995,-0.06627
2013-01-03,1.379699,-0.936756,-1.276197,-0.020153
2013-01-04,0.007697,-0.088263,-0.987374,-0.188286


In [31]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.618253,0.502765,,
2013-01-02,,0.91932,,
2013-01-03,1.379699,,,
2013-01-04,0.007697,,,
2013-01-05,,,,
2013-01-06,,,,0.766281


In [32]:
df2 = df.copy()

df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']

df2

Unnamed: 0,A,B,C,D,E
2013-01-01,0.618253,0.502765,-0.067995,-0.06627,one
2013-01-02,-0.995121,0.91932,-1.221516,-1.016053,one
2013-01-03,1.379699,-0.936756,-1.276197,-0.020153,two
2013-01-04,0.007697,-0.088263,-0.987374,-0.188286,three
2013-01-05,-0.046063,-0.592812,-0.544709,-0.744876,four
2013-01-06,-0.033579,-0.937799,-2.322357,0.766281,three


In [33]:
df2[df2['E'].isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2013-01-03,1.379699,-0.936756,-1.276197,-0.020153,two
2013-01-05,-0.046063,-0.592812,-0.544709,-0.744876,four


### Setting

## Missing Data

In [34]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])

df1.loc[dates[0]: dates[1], 'E'] = 1

df1

Unnamed: 0,A,B,C,D,E
2013-01-01,0.618253,0.502765,-0.067995,-0.06627,1.0
2013-01-02,-0.995121,0.91932,-1.221516,-1.016053,1.0
2013-01-03,1.379699,-0.936756,-1.276197,-0.020153,
2013-01-04,0.007697,-0.088263,-0.987374,-0.188286,


In [44]:
df1.dropna(how='any')

Unnamed: 0,A,B,C,D,E
2013-01-01,0.618253,0.502765,-0.067995,-0.06627,1.0
2013-01-02,-0.995121,0.91932,-1.221516,-1.016053,1.0


In [45]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,E
2013-01-01,0.618253,0.502765,-0.067995,-0.06627,1.0
2013-01-02,-0.995121,0.91932,-1.221516,-1.016053,1.0
2013-01-03,1.379699,-0.936756,-1.276197,-0.020153,5.0
2013-01-04,0.007697,-0.088263,-0.987374,-0.188286,5.0


In [46]:
pd.isnull(df1)

Unnamed: 0,A,B,C,D,E
2013-01-01,False,False,False,False,False
2013-01-02,False,False,False,False,False
2013-01-03,False,False,False,False,True
2013-01-04,False,False,False,False,True


## Operations

### Stats

In [47]:
df.mean()

A    -2.646609
B     8.000440
C    14.582645
D    15.084920
dtype: float64

In [48]:
df.mean(1)

2000-01-01     0.005063
2000-01-02    -0.043278
2000-01-03     0.056917
2000-01-04     0.299292
2000-01-05     0.584717
2000-01-06    -0.060725
2000-01-07    -0.276199
2000-01-08    -0.134982
2000-01-09     0.157134
2000-01-10    -0.550964
2000-01-11    -0.039723
2000-01-12    -0.083385
2000-01-13    -0.785855
2000-01-14    -0.649820
2000-01-15    -0.844099
2000-01-16    -1.414139
2000-01-17    -0.704719
2000-01-18    -0.672511
2000-01-19     0.218788
2000-01-20     1.159569
2000-01-21     1.100745
2000-01-22     1.213534
2000-01-23     1.410487
2000-01-24     2.531551
2000-01-25     2.960139
2000-01-26     3.262218
2000-01-27     3.322619
2000-01-28     3.291990
2000-01-29     3.562352
2000-01-30     3.752605
                ...    
2002-08-28    28.105632
2002-08-29    28.663800
2002-08-30    29.268673
2002-08-31    29.475474
2002-09-01    30.079525
2002-09-02    30.271997
2002-09-03    30.510502
2002-09-04    30.214186
2002-09-05    30.829085
2002-09-06    30.984539
2002-09-07    31

In [49]:
s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2)

s

2013-01-01    NaN
2013-01-02    NaN
2013-01-03    1.0
2013-01-04    3.0
2013-01-05    5.0
2013-01-06    NaN
Freq: D, dtype: float64

In [50]:
df.sub(s, axis='index')

Unnamed: 0,A,B,C,D
2000-01-01,,,,
2000-01-02,,,,
2000-01-03,,,,
2000-01-04,,,,
2000-01-05,,,,
2000-01-06,,,,
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,


### Apply

In [51]:
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D
2000-01-01,-0.832751,0.868059,-0.301560,0.286505
2000-01-02,-0.901755,1.216812,-1.244315,0.776400
2000-01-03,0.092781,1.200232,-2.613368,1.395166
2000-01-04,1.668834,1.266200,-3.896847,2.233795
2000-01-05,3.619437,1.374661,-4.296806,2.913557
2000-01-06,4.394162,1.436736,-4.906988,2.444037
2000-01-07,5.469487,0.550584,-5.727608,1.970690
2000-01-08,7.317802,-1.023033,-8.385486,3.813942
2000-01-09,9.121496,-2.423652,-11.713516,7.367431
2000-01-10,11.493081,-3.722708,-16.267937,8.645467


In [52]:
df.apply(lambda x: x.max() - x.min())

A    28.918161
B    58.627017
C    57.080590
D    49.180040
dtype: float64

### Histogramming

In [53]:
s = pd.Series(np.random.randint(0, 7, size=10))

s

0    4
1    5
2    4
3    3
4    1
5    0
6    4
7    5
8    1
9    6
dtype: int64

In [54]:
s.value_counts()

4    3
5    2
1    2
6    1
3    1
0    1
dtype: int64

### String Methods

In [55]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])

s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

## Merge

### Concat

In [56]:
df = pd.DataFrame(np.random.randn(10, 4))

df

Unnamed: 0,0,1,2,3
0,0.321902,1.20507,0.330259,0.693629
1,0.597168,0.261625,1.299816,0.657233
2,0.633787,2.421194,0.397054,-0.137348
3,-0.398237,1.582425,-1.448677,-1.714734
4,-0.183996,1.236898,-0.153148,-0.172407
5,1.39201,-1.034959,0.346614,-0.370872
6,-0.292955,-1.016325,-0.215982,-0.576879
7,-0.408416,0.04812,0.231508,0.110683
8,-0.893417,1.454152,-0.874285,-0.681164
9,0.533998,1.208251,-1.159619,0.159926


In [58]:
pieces = [df[:3], df[3:7], df[7:]]

pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,0.321902,1.20507,0.330259,0.693629
1,0.597168,0.261625,1.299816,0.657233
2,0.633787,2.421194,0.397054,-0.137348
3,-0.398237,1.582425,-1.448677,-1.714734
4,-0.183996,1.236898,-0.153148,-0.172407
5,1.39201,-1.034959,0.346614,-0.370872
6,-0.292955,-1.016325,-0.215982,-0.576879
7,-0.408416,0.04812,0.231508,0.110683
8,-0.893417,1.454152,-0.874285,-0.681164
9,0.533998,1.208251,-1.159619,0.159926


### Join

In [59]:
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})

right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})

left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [60]:
right

Unnamed: 0,key,rval
0,foo,4
1,foo,5


In [61]:
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [62]:
left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})

right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})

left

Unnamed: 0,key,lval
0,foo,1
1,bar,2


In [63]:
right

Unnamed: 0,key,rval
0,foo,4
1,bar,5


In [64]:
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,bar,2,5


### Append

In [65]:
df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D'])

df

Unnamed: 0,A,B,C,D
0,-0.487518,1.274625,-0.485657,-0.016763
1,-0.44147,-2.051882,-0.632473,-1.595755
2,-0.029285,0.353873,-0.376998,0.842352
3,0.027088,0.507215,-1.145043,-0.639576
4,0.948615,0.321521,-0.30654,-0.837198
5,-0.523604,-0.135446,1.478583,-0.150725
6,2.32693,-1.06917,1.302856,2.774401
7,0.492611,0.554848,0.013007,0.100322


In [66]:
s = df.iloc[3]

df.append(s, ignore_index=True)

Unnamed: 0,A,B,C,D
0,-0.487518,1.274625,-0.485657,-0.016763
1,-0.44147,-2.051882,-0.632473,-1.595755
2,-0.029285,0.353873,-0.376998,0.842352
3,0.027088,0.507215,-1.145043,-0.639576
4,0.948615,0.321521,-0.30654,-0.837198
5,-0.523604,-0.135446,1.478583,-0.150725
6,2.32693,-1.06917,1.302856,2.774401
7,0.492611,0.554848,0.013007,0.100322
8,0.027088,0.507215,-1.145043,-0.639576


## Grouping

In [67]:
df = pd.DataFrame({ 'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
                    'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
                    'C': np.random.randn(8),
                    'D': np.random.randn(8) })

df

Unnamed: 0,A,B,C,D
0,foo,one,-0.651992,0.106746
1,bar,one,-0.552285,-0.392568
2,foo,two,-1.254844,2.8e-05
3,bar,three,-1.115552,0.566673
4,foo,two,-0.746,-0.923376
5,bar,two,-0.502222,1.426331
6,foo,one,0.420041,0.86667
7,foo,three,0.67506,-0.423116


In [68]:
df.groupby('A').sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,-2.170058,1.600436
foo,-1.557735,-0.373048


In [69]:
df.groupby(['A', 'B']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,C,D
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.552285,-0.392568
bar,three,-1.115552,0.566673
bar,two,-0.502222,1.426331
foo,one,-0.231951,0.973416
foo,three,0.67506,-0.423116
foo,two,-2.000844,-0.923348


## Reshaping

### Stack

In [70]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
                    ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]))

index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])

df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])

df2 = df[:4]

df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.944384,-1.027474
bar,two,-1.285602,-1.45961
baz,one,0.959814,-2.647089
baz,two,-2.352397,0.297181


In [71]:
stacked = df2.stack()

stacked

first  second   
bar    one     A    0.944384
               B   -1.027474
       two     A   -1.285602
               B   -1.459610
baz    one     A    0.959814
               B   -2.647089
       two     A   -2.352397
               B    0.297181
dtype: float64

In [72]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.944384,-1.027474
bar,two,-1.285602,-1.45961
baz,one,0.959814,-2.647089
baz,two,-2.352397,0.297181


In [73]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,0.944384,-1.285602
bar,B,-1.027474,-1.45961
baz,A,0.959814,-2.352397
baz,B,-2.647089,0.297181


In [74]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.944384,0.959814
one,B,-1.027474,-2.647089
two,A,-1.285602,-2.352397
two,B,-1.45961,0.297181


### Pivot Tables

In [76]:
df = pd.DataFrame({ 'A': ['one', 'one', 'two', 'three'] * 3,
                    'B': ['A', 'B', 'C'] * 4,
                    'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
                    'D' : np.random.randn(12),
                    'E' : np.random.randn(12) })

In [77]:
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.475295,-1.371719
one,B,0.248364,0.556322
one,C,-0.536821,0.310601
three,A,-0.503781,
three,B,,0.338301
three,C,-0.536639,
two,A,,0.18636
two,B,1.284389,
two,C,,-1.132025


## Time Series

In [None]:
rng = pd.date_range('1/1/2012', periods=100, freq='S')

## Categoricals

In [35]:
df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})

In [36]:
df["grade"] = df["raw_grade"].astype("category")

df["grade"]

0    a
1    b
2    b
3    a
4    a
5    e
Name: grade, dtype: category
Categories (3, object): [a, b, e]

In [37]:
df["grade"].cat.categories = ["very good", "good", "very bad"]

In [38]:
df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium",
                                              "good", "very good"])

df["grade"]

0    very good
1         good
2         good
3    very good
4    very good
5     very bad
Name: grade, dtype: category
Categories (5, object): [very bad, bad, medium, good, very good]

In [39]:
df.sort_values(by="grade")

Unnamed: 0,id,raw_grade,grade
5,6,e,very bad
1,2,b,good
2,3,b,good
0,1,a,very good
3,4,a,very good
4,5,a,very good


In [40]:
df.groupby("grade").size()

grade
very bad     1
bad          0
medium       0
good         2
very good    3
dtype: int64

## Plotting

In [41]:
ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))

ts = ts.cumsum()

ts.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7fba3f7f6240>

In [42]:
df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=['A', 'B', 'C', 'D'])

df = df.cumsum()

plt.figure(); df.plot(); plt.legend(loc='best')

<matplotlib.legend.Legend at 0x7fba3d767898>

## Getting Data In/Out

### CSV

### HDF5

### Excel

## Gotchas

In [43]:
if pd.Series([False, True, False]):
    print("I was true")

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().