In [1]:
# magic function para hacer que los graficos de matplotlib se renderizen en el notebook.
%matplotlib inline

import datetime as datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('default') # haciendo los graficos un poco mas bonitos xD
plt.rcParams['figure.figsize'] = (15, 5)

## Basics

### Diference between series and dataframes

In [3]:
index = pd.date_range('1/1/2000', periods=8)
index

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08'],
              dtype='datetime64[ns]', freq='D')

In [8]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s

a    0.361096
b   -0.128985
c   -1.812994
d    2.341465
e   -0.104195
dtype: float64

In [39]:
s[:2]

a    0.361096
b   -0.128985
dtype: float64

In [41]:
df = pd.DataFrame(np.random.randn(8, 3), index=index,columns=['A', 'B', 'C'])
df

Unnamed: 0,A,B,C
2000-01-01,0.393665,-0.447429,-0.958528
2000-01-02,-0.555935,0.763802,0.401253
2000-01-03,-1.473354,-0.747054,0.450336
2000-01-04,0.339217,0.463828,-1.275977
2000-01-05,-0.970094,0.574072,1.319665
2000-01-06,-0.071996,-1.050359,-0.455212
2000-01-07,0.836221,1.019894,-0.692374
2000-01-08,1.912515,-0.471873,1.146051


In [42]:
df.index

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08'],
              dtype='datetime64[ns]', freq='D')

In [10]:
np.random.randn(8, 3)

array([[ 1.44964888,  2.61305999,  0.65528711],
       [-1.27823321, -0.81834959, -0.08112856],
       [ 1.11545792,  0.10713846, -0.72319262],
       [ 2.21465685,  0.25909875, -2.12727954],
       [ 0.26846608, -0.5390112 ,  0.78508373],
       [ 0.98245299, -0.58096006, -1.83465214],
       [-0.35455814, -0.74294363,  1.05222191],
       [-0.91526366,  0.70543912,  0.88710118]])

In [11]:
wp = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],\
              major_axis=pd.date_range('1/1/2000', periods=5),\
              minor_axis=['A', 'B', 'C', 'D'])
wp

<class 'pandas.core.panel.Panel'>
Dimensions: 2 (items) x 5 (major_axis) x 4 (minor_axis)
Items axis: Item1 to Item2
Major_axis axis: 2000-01-01 00:00:00 to 2000-01-05 00:00:00
Minor_axis axis: A to D

### Attributes

In [17]:
df[:3]

Unnamed: 0,A,B,C
2000-01-04,0.890492,0.60274,-0.023299
2000-01-05,0.770993,-0.048575,1.19255
2000-01-06,-2.536046,0.212527,1.142014
2000-01-07,0.196809,-0.945342,-0.072981
2000-01-08,-1.384615,-3.152787,1.019013


In [18]:
df.columns

Index(['A', 'B', 'C'], dtype='object')

In [19]:
[x for x in df.columns]

['A', 'B', 'C']

In [21]:
df.values

array([[  3.87795416e-01,  -2.75151314e-01,   3.35347758e-01],
       [ -2.04431752e-03,   2.50429188e+00,  -9.14655087e-01],
       [ -6.32261820e-01,   9.39904289e-02,  -1.10173155e-01],
       [  8.90491587e-01,   6.02740024e-01,  -2.32986364e-02],
       [  7.70993244e-01,  -4.85750726e-02,   1.19255004e+00],
       [ -2.53604598e+00,   2.12526718e-01,   1.14201441e+00],
       [  1.96809361e-01,  -9.45341973e-01,  -7.29806036e-02],
       [ -1.38461485e+00,  -3.15278690e+00,   1.01901306e+00]])

In [22]:
s.values

array([ 0.36109584, -0.12898477, -1.81299416,  2.34146489, -0.10419526])

### Binary operations

In [23]:
df = pd.DataFrame({'one' : pd.Series(np.random.randn(3), index=['a', 'b', 'c']),\
                   'two' : pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']),\
                   'three' : pd.Series(np.random.randn(3), index=['b', 'c', 'd'])})
df

Unnamed: 0,one,three,two
a,-0.956812,,-0.481334
b,0.252814,-0.007351,0.335352
c,-0.781272,0.716216,1.129366
d,,-0.045235,0.36781


In [26]:
row = df.iloc[1]
row

one      0.252814
three   -0.007351
two      0.335352
Name: b, dtype: float64

In [32]:
df[1:2]

Unnamed: 0,one,three,two
b,0.252814,-0.007351,0.335352


In [27]:
column = df['two']
column

a   -0.481334
b    0.335352
c    1.129366
d    0.367810
Name: two, dtype: float64

In [38]:
df['two'][:2]

a   -0.481334
b    0.335352
Name: two, dtype: float64

In [35]:
# to data level
df.isnull()

Unnamed: 0,one,three,two
a,False,True,False
b,False,False,False
c,False,False,False
d,True,False,False


In [36]:
# to column level
df.isnull().any()

one       True
three     True
two      False
dtype: bool

In [37]:
# to column level
df.isnull().sum()

one      1
three    1
two      0
dtype: int64