# Brushing up Pandas a bit

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 8)
pd.set_option('display.max_rows', 8)

plt.rcParams["figure.figsize"] = (15,10)

In [7]:
np.random.seed(1234)

s = pd.Series(np.random.randn(100))
s

0     0.471435
1    -1.190976
2     1.432707
3    -0.312652
        ...   
96   -0.344766
97    0.528288
98   -1.068989
99   -0.511881
dtype: float64

In [8]:
s[2:20]

2     1.432707
3    -0.312652
4    -0.720589
5     0.887163
        ...   
16    0.405453
17    0.289092
18    1.321158
19   -1.546906
dtype: float64

In [9]:
s.values

array([  4.71435164e-01,  -1.19097569e+00,   1.43270697e+00,
        -3.12651896e-01,  -7.20588733e-01,   8.87162940e-01,
         8.59588414e-01,  -6.36523504e-01,   1.56963721e-02,
        -2.24268495e+00,   1.15003572e+00,   9.91946022e-01,
         9.53324128e-01,  -2.02125482e+00,  -3.34077366e-01,
         2.11836468e-03,   4.05453412e-01,   2.89091941e-01,
         1.32115819e+00,  -1.54690555e+00,  -2.02646325e-01,
        -6.55969344e-01,   1.93421376e-01,   5.53438911e-01,
         1.31815155e+00,  -4.69305285e-01,   6.75554085e-01,
        -1.81702723e+00,  -1.83108540e-01,   1.05896919e+00,
        -3.97840228e-01,   3.37437654e-01,   1.04757857e+00,
         1.04593826e+00,   8.63717292e-01,  -1.22091575e-01,
         1.24712954e-01,  -3.22794806e-01,   8.41674713e-01,
         2.39096052e+00,   7.61995878e-02,  -5.66445930e-01,
         3.61419367e-02,  -2.07497760e+00,   2.47792200e-01,
        -8.97156784e-01,  -1.36794833e-01,   1.82891913e-02,
         7.55413982e-01,

In [10]:
s.index

Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
            51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
            68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
            85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],
           dtype='int64')

In [11]:
len(s)

100

In [12]:
s.shape

(100,)

In [13]:
s.count

<bound method Series.count of 0     0.471435
1    -1.190976
2     1.432707
3    -0.312652
        ...   
96   -0.344766
97    0.528288
98   -1.068989
99   -0.511881
dtype: float64>

In [14]:
s.unique()

array([  4.71435164e-01,  -1.19097569e+00,   1.43270697e+00,
        -3.12651896e-01,  -7.20588733e-01,   8.87162940e-01,
         8.59588414e-01,  -6.36523504e-01,   1.56963721e-02,
        -2.24268495e+00,   1.15003572e+00,   9.91946022e-01,
         9.53324128e-01,  -2.02125482e+00,  -3.34077366e-01,
         2.11836468e-03,   4.05453412e-01,   2.89091941e-01,
         1.32115819e+00,  -1.54690555e+00,  -2.02646325e-01,
        -6.55969344e-01,   1.93421376e-01,   5.53438911e-01,
         1.31815155e+00,  -4.69305285e-01,   6.75554085e-01,
        -1.81702723e+00,  -1.83108540e-01,   1.05896919e+00,
        -3.97840228e-01,   3.37437654e-01,   1.04757857e+00,
         1.04593826e+00,   8.63717292e-01,  -1.22091575e-01,
         1.24712954e-01,  -3.22794806e-01,   8.41674713e-01,
         2.39096052e+00,   7.61995878e-02,  -5.66445930e-01,
         3.61419367e-02,  -2.07497760e+00,   2.47792200e-01,
        -8.97156784e-01,  -1.36794833e-01,   1.82891913e-02,
         7.55413982e-01,

In [15]:
s.unique

<bound method Series.unique of 0     0.471435
1    -1.190976
2     1.432707
3    -0.312652
        ...   
96   -0.344766
97    0.528288
98   -1.068989
99   -0.511881
dtype: float64>

In [16]:
s.value_counts()

 1.058969    1
-2.074978    1
-0.397840    1
 0.036142    1
            ..
 0.215269    1
-1.068989    1
 0.307969    1
 1.047579    1
dtype: int64

In [17]:
s2 = pd.Series([12,45,85,4,47,58], index=['a','b','c','d','e','f'])
s2

a    12
b    45
c    85
d     4
e    47
f    58
dtype: int64

In [18]:
pd.DataFrame(np.array([[10, 11], [20, 21]]))

    0   1
0  10  11
1  20  21

In [19]:
df1 = pd.DataFrame([pd.Series(np.arange(10, 15)),
     pd.Series(np.arange(15, 20))])
df1 # remember what the column names are

    0   1   2   3   4
0  10  11  12  13  14
1  15  16  17  18  19

In [20]:
df1.shape

(2, 5)

In [21]:
df = pd.DataFrame(np.array([[10, 11], [20, 21]]),
     columns=['a', 'b'])
df # What are the column names here?

    a   b
0  10  11
1  20  21

In [22]:
df.columns

Index(['a', 'b'], dtype='object')

In [23]:
df.columns = ['c1', 'c2']
df

   c1  c2
0  10  11
1  20  21

In [24]:
df.columns

Index(['c1', 'c2'], dtype='object')

In [25]:
df.values

array([[10, 11],
       [20, 21]])

In [26]:
s1 = pd.Series(np.arange(1, 6, 1))
s2 = pd.Series(np.arange(6, 11, 1))
pd.DataFrame({'c1': s1, 'c2': s2})

   c1  c2
0   1   6
1   2   7
2   3   8
3   4   9
4   5  10

In [27]:
s3 = pd.Series(np.arange(12, 14), index=[1, 2])
pd.DataFrame({'c1': s1, 'c2': s2, 'c3': s3})

   c1  c2  c3
0   1   6 NaN
1   2   7  12
2   3   8  13
3   4   9 NaN
4   5  10 NaN