# Pandas Basics

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Create Series

In [5]:
my_s = pd.Series([1,3,5,np.nan,6,8])
my_s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

## Create DataFrame

In [9]:
#create df from date range
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [11]:
#create df from random data
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.172995,-1.428918,-0.86462,0.515432
2013-01-02,1.335148,-0.334186,-0.338378,1.072638
2013-01-03,0.373446,0.955793,-1.899978,-0.920736
2013-01-04,1.567426,-0.106423,-0.464616,0.251373
2013-01-05,-1.448839,-1.316393,-0.00029,0.33397
2013-01-06,-1.322475,-0.29994,1.938535,-0.957269


In [13]:
#create df from dicts
df2 = pd.DataFrame({ 'A' : 1.,
                    'B' : pd.Timestamp('20130102'),
                    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                    'D' : np.array([3] * 4,dtype='int32'),
                    'E' : pd.Categorical(["test","train","test","train"]),
                    'F' : 'foo' })
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [14]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [15]:
df.values

array([[ -1.72994665e-01,  -1.42891755e+00,  -8.64620409e-01,
          5.15431986e-01],
       [  1.33514841e+00,  -3.34186033e-01,  -3.38377785e-01,
          1.07263814e+00],
       [  3.73446164e-01,   9.55792627e-01,  -1.89997820e+00,
         -9.20735704e-01],
       [  1.56742641e+00,  -1.06423048e-01,  -4.64615590e-01,
          2.51372983e-01],
       [ -1.44883904e+00,  -1.31639336e+00,  -2.89573793e-04,
          3.33970346e-01],
       [ -1.32247549e+00,  -2.99939666e-01,   1.93853478e+00,
         -9.57269246e-01]])

In [16]:
#sort data by axis 0
df.sort_index(axis=0, ascending=False)

Unnamed: 0,A,B,C,D
2013-01-06,-1.322475,-0.29994,1.938535,-0.957269
2013-01-05,-1.448839,-1.316393,-0.00029,0.33397
2013-01-04,1.567426,-0.106423,-0.464616,0.251373
2013-01-03,0.373446,0.955793,-1.899978,-0.920736
2013-01-02,1.335148,-0.334186,-0.338378,1.072638
2013-01-01,-0.172995,-1.428918,-0.86462,0.515432


In [18]:
#sort data by axis 1
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.515432,-0.86462,-1.428918,-0.172995
2013-01-02,1.072638,-0.338378,-0.334186,1.335148
2013-01-03,-0.920736,-1.899978,0.955793,0.373446
2013-01-04,0.251373,-0.464616,-0.106423,1.567426
2013-01-05,0.33397,-0.00029,-1.316393,-1.448839
2013-01-06,-0.957269,1.938535,-0.29994,-1.322475
