In [4]:
import pandas as pd
import numpy as np

In [10]:
df = pd.DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C'])
df

Unnamed: 0,A,B,C
0,-0.382208,-0.24142,-0.742353
1,-2.080116,0.357193,-1.002231
2,1.756393,0.79077,0.487555
3,0.553511,0.120467,1.995409
4,-0.136014,1.333215,-1.006624
5,0.19974,-0.912216,-0.029992
6,-0.093345,-0.087062,-0.530295
7,-1.036118,-0.787756,1.040949


In [12]:
df.A.array

<PandasArray>
[ -0.3822083932222151,   -2.080115828136748,   1.7563930834277417,
   0.5535109502891793,  -0.1360144471664851,  0.19974049116817869,
 -0.09334511426882847,  -1.0361180706152802]
Length: 8, dtype: float64

## Counting values in Series

In [14]:
data = np.random.randint(0, 7, size = 50)
data

array([5, 2, 1, 6, 2, 2, 6, 2, 0, 5, 3, 1, 1, 1, 5, 4, 2, 0, 5, 2, 5, 1,
       4, 5, 1, 0, 3, 0, 3, 4, 5, 1, 5, 5, 2, 3, 4, 5, 6, 6, 2, 0, 6, 4,
       1, 5, 6, 4, 1, 4])

In [15]:
s = pd.Series(data)

In [16]:
s.value_counts()

5    11
1     9
2     8
4     7
6     6
0     5
3     4
dtype: int64

In [17]:
s5 = pd.Series([1, 1, 3, 3, 3, 5, 5, 7, 7, 7])
s5.mode()

0    3
1    7
dtype: int64

In [18]:
df5 = pd.DataFrame({"A": np.random.randint(0, 7, size=50), "B": np.random.randint(-10, 15, size=50)})

In [19]:
df5.mode()

Unnamed: 0,A,B
0,2,-10
1,4,12


## Altering labels

In [21]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s

a    0.658059
b    1.206485
c    0.481655
d    0.517802
e    2.377547
dtype: float64

In [22]:
s.reindex(['e', 'b', 'f', 'd'])

e    2.377547
b    1.206485
f         NaN
d    0.517802
dtype: float64

In [24]:
df = pd.DataFrame({
     'one': pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
     'two': pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
     'three': pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})
df

Unnamed: 0,one,two,three
a,2.027005,0.276317,
b,0.994746,-0.036271,1.193347
c,-0.655235,0.738489,0.137495
d,,-0.558402,0.770791


In [25]:
df.reindex(index=['c', 'f', 'b'], columns=['three', 'two', 'one'])

Unnamed: 0,three,two,one
c,0.137495,0.738489,-0.655235
f,,,
b,1.193347,-0.036271,0.994746


In [28]:
df.reindex(['c', 'f', 'b'], axis='index')

Unnamed: 0,one,two,three
c,-0.655235,0.738489,0.137495
f,,,
b,0.994746,-0.036271,1.193347


In [29]:
df

Unnamed: 0,one,two,three
a,2.027005,0.276317,
b,0.994746,-0.036271,1.193347
c,-0.655235,0.738489,0.137495
d,,-0.558402,0.770791


In [30]:
df.drop(['a', 'd'], axis=0)

Unnamed: 0,one,two,three
b,0.994746,-0.036271,1.193347
c,-0.655235,0.738489,0.137495


In [33]:
df.drop(['one'], axis=1)

Unnamed: 0,two,three
a,0.276317,
b,-0.036271,1.193347
c,0.738489,0.137495
d,-0.558402,0.770791


In [34]:
s

a    0.658059
b    1.206485
c    0.481655
d    0.517802
e    2.377547
dtype: float64

In [35]:
s.rename(str.upper)

A    0.658059
B    1.206485
C    0.481655
D    0.517802
E    2.377547
dtype: float64

In [37]:
df.rename(columns = {'one': 'foo', 'two': 'bar'}, index = {'a': 'apple', 'b': 'banana', 'd': 'durian'})

Unnamed: 0,foo,bar,three
apple,2.027005,0.276317,
banana,0.994746,-0.036271,1.193347
c,-0.655235,0.738489,0.137495
durian,,-0.558402,0.770791


In [39]:
df.rename({'one': 'foo', 'two': 'bar'}, axis='columns')

Unnamed: 0,foo,bar,three
a,2.027005,0.276317,
b,0.994746,-0.036271,1.193347
c,-0.655235,0.738489,0.137495
d,,-0.558402,0.770791


In [43]:
df.rename({'a': 'apple', 'b': 'banana', 'd': 'durian'}, axis='index')

Unnamed: 0,one,two,three
apple,2.027005,0.276317,
banana,0.994746,-0.036271,1.193347
c,-0.655235,0.738489,0.137495
durian,,-0.558402,0.770791


## .dt and .str accessors

In [46]:
s = pd.Series(pd.date_range('20130101 09:10:12', periods=4))
s

0   2013-01-01 09:10:12
1   2013-01-02 09:10:12
2   2013-01-03 09:10:12
3   2013-01-04 09:10:12
dtype: datetime64[ns]

In [47]:
s.dt.hour

0    9
1    9
2    9
3    9
dtype: int64

In [55]:
print(f'1. Second\n{s.dt.second}\n')
print(f'2. Day\n{s.dt.day}\n')
print(f'3. Day of Week\n{s.dt.dayofweek}')

1. Second
0    12
1    12
2    12
3    12
dtype: int64

2. Day
0    1
1    2
2    3
3    4
dtype: int64

3. Day of Week
0    1
1    2
2    3
3    4
dtype: int64


In [56]:
stz = s.dt.tz_localize('US/Eastern')
stz

0   2013-01-01 09:10:12-05:00
1   2013-01-02 09:10:12-05:00
2   2013-01-03 09:10:12-05:00
3   2013-01-04 09:10:12-05:00
dtype: datetime64[ns, US/Eastern]

In [57]:
stz.dt.tz

<DstTzInfo 'US/Eastern' LMT-1 day, 19:04:00 STD>

In [58]:
s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern')

0   2013-01-01 04:10:12-05:00
1   2013-01-02 04:10:12-05:00
2   2013-01-03 04:10:12-05:00
3   2013-01-04 04:10:12-05:00
dtype: datetime64[ns, US/Eastern]

In [60]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'], dtype="string")
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5    <NA>
6    caba
7     dog
8     cat
dtype: string

## Sorting

In [63]:
df = pd.DataFrame({
        'one': pd.Series(np.random.randn(3), index=['a', 'b', 'c']),
        'two': pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),
        'three': pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})
df

Unnamed: 0,one,two,three
a,0.460199,1.835203,
b,1.243385,-1.103504,0.637164
c,-2.268051,-1.76952,0.496958
d,,-0.394821,-2.64055


In [64]:
unsorted_df = df.reindex(index=['a', 'd', 'c', 'b'],
                          columns=['three', 'two', 'one'])
unsorted_df

Unnamed: 0,three,two,one
a,,1.835203,0.460199
d,-2.64055,-0.394821,
c,0.496958,-1.76952,-2.268051
b,0.637164,-1.103504,1.243385


In [65]:
unsorted_df.sort_index()

Unnamed: 0,three,two,one
a,,1.835203,0.460199
b,0.637164,-1.103504,1.243385
c,0.496958,-1.76952,-2.268051
d,-2.64055,-0.394821,


In [66]:
unsorted_df.sort_index(ascending = False)

Unnamed: 0,three,two,one
d,-2.64055,-0.394821,
c,0.496958,-1.76952,-2.268051
b,0.637164,-1.103504,1.243385
a,,1.835203,0.460199


In [68]:
unsorted_df.sort_index(axis = 1)

Unnamed: 0,one,three,two
a,0.460199,,1.835203
d,,-2.64055,-0.394821
c,-2.268051,0.496958,-1.76952
b,1.243385,0.637164,-1.103504


In [69]:
unsorted_df['three'].sort_index()

a         NaN
b    0.637164
c    0.496958
d   -2.640550
Name: three, dtype: float64

In [70]:
df1 = pd.DataFrame({'one': [2, 1, 1, 1],
                        'two': [1, 3, 2, 4],
                        'three': [5, 4, 3, 2]})
df1

Unnamed: 0,one,two,three
0,2,1,5
1,1,3,4
2,1,2,3
3,1,4,2


In [71]:
df1.sort_values(by = 'two')

Unnamed: 0,one,two,three
0,2,1,5
2,1,2,3
1,1,3,4
3,1,4,2


In [80]:
df1.sort_values(by = ['one', 'two'])

Unnamed: 0,one,two,three
2,1,2,3
1,1,3,4
3,1,4,2
0,2,1,5


In [83]:
s[2] = np.nan
s.sort_values()

0       A
3    Aaba
1       B
4    Baca
6    CABA
8     cat
7     dog
2    <NA>
5    <NA>
dtype: string

In [84]:
s.sort_values(na_position = 'first')

2    <NA>
5    <NA>
0       A
3    Aaba
1       B
4    Baca
6    CABA
8     cat
7     dog
dtype: string

In [100]:
idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2), ('b', 2), ('b', 1), ('b', 1)], 
                                names = ['first', 'second'])
# idx.names = ['first', 'second']

In [108]:
df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)}, index = idx)
df_multi

Unnamed: 0_level_0,Unnamed: 1_level_0,A
first,second,Unnamed: 2_level_1
a,1,6
a,2,5
a,2,4
b,2,3
b,1,2
b,1,1


In [109]:
df_multi.sort_values(by = ['second', 'A'])

Unnamed: 0_level_0,Unnamed: 1_level_0,A
first,second,Unnamed: 2_level_1
b,1,1
b,1,2
a,1,6
b,2,3
a,2,4
a,2,5
