# Pandas Basics

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Create Series

In [7]:
my_s = pd.Series([1,3,5,np.nan,6,8])
my_s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

## Create DataFrame

In [8]:
#create df from date range
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [9]:
#create df from dicts
df2 = pd.DataFrame({ 'A' : 1.,
                    'B' : pd.Timestamp('20130102'),
                    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                    'D' : np.array([3] * 4,dtype='int32'),
                    'E' : pd.Categorical(["test","train","test","train"]),
                    'F' : 'foo' })
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [10]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [11]:
#create df from random data
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.728575,-2.354537,-1.630834,0.603714
2013-01-02,0.641661,-1.397705,0.728142,0.787867
2013-01-03,-0.533546,-0.658589,0.4363,-0.21453
2013-01-04,-2.281113,0.13119,-0.131027,-0.413191
2013-01-05,-0.53612,0.562561,-0.409359,0.121337
2013-01-06,1.13126,-0.343071,1.482237,0.65289


In [12]:
df.values

array([[-0.72857548, -2.35453748, -1.63083411,  0.6037143 ],
       [ 0.6416606 , -1.39770533,  0.72814177,  0.78786731],
       [-0.53354644, -0.65858889,  0.43630003, -0.21453046],
       [-2.28111275,  0.13119034, -0.13102709, -0.41319121],
       [-0.53612037,  0.56256124, -0.40935923,  0.12133689],
       [ 1.13126045, -0.34307124,  1.48223665,  0.65289011]])

In [13]:
#sort data by axis 0
df.sort_index(axis=0, ascending=False)

Unnamed: 0,A,B,C,D
2013-01-06,1.13126,-0.343071,1.482237,0.65289
2013-01-05,-0.53612,0.562561,-0.409359,0.121337
2013-01-04,-2.281113,0.13119,-0.131027,-0.413191
2013-01-03,-0.533546,-0.658589,0.4363,-0.21453
2013-01-02,0.641661,-1.397705,0.728142,0.787867
2013-01-01,-0.728575,-2.354537,-1.630834,0.603714


In [14]:
#sort data by axis 1
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.603714,-1.630834,-2.354537,-0.728575
2013-01-02,0.787867,0.728142,-1.397705,0.641661
2013-01-03,-0.21453,0.4363,-0.658589,-0.533546
2013-01-04,-0.413191,-0.131027,0.13119,-2.281113
2013-01-05,0.121337,-0.409359,0.562561,-0.53612
2013-01-06,0.65289,1.482237,-0.343071,1.13126


In [19]:
#to create one dimensional ndarray
df.as_matrix().reshape(-1).shape

(24,)

In [22]:
#to create one dimensional ndarray
df.as_matrix().ravel().shape

(24,)