In [14]:
import pandas as pd
import numpy as np

## Series

In [15]:
s = pd.Series(np.random.random(5), index = ['a', 'b', 'c', 'd', 'e'])

In [16]:
s

a    0.751153
b    0.005552
c    0.737204
d    0.298465
e    0.248996
dtype: float64

In [17]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [18]:
pd.Series(np.random.randn(5))

0    0.616291
1    1.104805
2   -1.063225
3    0.617031
4   -0.877332
dtype: float64

In [19]:
d = {'b': 1, 'a': 0, 'c': 2}
pd.Series(d)

b    1
a    0
c    2
dtype: int64

In [20]:
pd.Series(5, index = ['a', 'b', 'c', 'd', 'e'])

a    5
b    5
c    5
d    5
e    5
dtype: int64

In [21]:
s[0]
s[:3]
s[s > s.median()]
s[[4, 3, 1]]
np.exp(s)

a    2.119443
b    1.005567
c    2.090083
d    1.347789
e    1.282737
dtype: float64

In [22]:
s.dtype

dtype('float64')

In [23]:
s.to_numpy()

array([0.75115312, 0.00555155, 0.73720369, 0.29846532, 0.24899633])

In [24]:
s['a']
s['e']
s
'e' in s
'f' in s

False

In [25]:
# s['f']

In [26]:
s + s
s * 2
np.exp(s)

a    2.119443
b    1.005567
c    2.090083
d    1.347789
e    1.282737
dtype: float64

In [27]:
s1 = s[1:]
s2 = s[:-1]
s1 + s2

a         NaN
b    0.011103
c    1.474407
d    0.596931
e         NaN
dtype: float64

In [28]:
s = pd.Series(np.random.randn(5), name = 'something')
s

0   -0.236109
1    0.051442
2    0.990441
3    0.035344
4    1.580066
Name: something, dtype: float64

In [29]:
s.name

'something'

## DataFrame

In [30]:
d = {'one':pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} 
d

{'one': a    1.0
 b    2.0
 c    3.0
 dtype: float64,
 'two': a    1.0
 b    2.0
 c    3.0
 d    4.0
 dtype: float64}

In [31]:
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [32]:
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [33]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns = ['two', 'three'])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [34]:
d = {'one': [1., 2., 3., 4.],
'two': [4., 3., 2., 1.]}
d
pd.DataFrame(d)
pd.DataFrame(d, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [35]:
pd.DataFrame(pd.Series(np.random.randn(5), name='something'))

Unnamed: 0,something
0,0.020063
1,1.288127
2,-2.940688
3,0.679304
4,-0.545725


In [36]:
df['one']
df['three'] = df['one'] * df['two']
df['three']
df['flag'] = df['one'] > 2


In [37]:
df = pd.DataFrame(np.random.randn(8, 3), columns=list('ABC'))
df*5+2
1/df

Unnamed: 0,A,B,C
0,3.444249,0.673103,0.77989
1,-11.086751,1.41508,0.870997
2,0.398811,1.133304,0.65191
3,2.276728,-0.790235,-2.377694
4,-1.4815,-5.383018,-4.551393
5,0.636698,-5.236459,0.880827
6,1.295444,1.372719,-0.795631
7,-2.456118,-0.461249,-1.274121


In [38]:
df1 = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 1]}, dtype=bool)
df2 = pd.DataFrame({'a': [0, 1, 1], 'b': [1, 1, 0]}, dtype=bool)
df1 & df2


Unnamed: 0,a,b
0,False,False
1,False,True
2,True,False


## dtypes

In [40]:
dft = pd.DataFrame({'A': np.random.rand(3),
                        'B': 1,
                        'C': 'foo',
                        'D': pd.Timestamp('20010102'),
                        'E': pd.Series([1.0] * 3).astype('float32'),
                        'F': False,
                        'G': pd.Series([1] * 3, dtype='int8')})

In [43]:
dft

Unnamed: 0,A,B,C,D,E,F,G
0,0.49133,1,foo,2001-01-02,1.0,False,1
1,0.486562,1,foo,2001-01-02,1.0,False,1
2,0.303197,1,foo,2001-01-02,1.0,False,1


In [42]:
dft.dtypes

A           float64
B             int64
C            object
D    datetime64[ns]
E           float32
F              bool
G              int8
dtype: object

In [44]:
dft['A'].dtype

dtype('float64')

## Converting

In [45]:
df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32')

In [47]:
df1.dtypes

A    float32
dtype: object

In [49]:
df1 = df1.astype('float64')
df1.dtypes

A    float64
dtype: object

In [52]:
dft1 = pd.DataFrame({'a': [1, 0, 1], 'b': [4, 5, 6], 'c': [7, 8, 9]})
dft1 = dft1.astype({'a': bool, 'c': np.float64})
dft1

Unnamed: 0,a,b,c
0,True,4,7.0
1,False,5,8.0
2,True,6,9.0


In [53]:
dft1.dtypes

a       bool
b      int64
c    float64
dtype: object