# Pandas Basics

In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Create Series

In [47]:
my_s = pd.Series([1,3,5,np.nan,6,8])
my_s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

## Create DataFrame

In [48]:
#create df from date range
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [49]:
#create df from dicts
df2 = pd.DataFrame({ 'A' : 1.,
                    'B' : pd.Timestamp('20130102'),
                    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                    'D' : np.array([3] * 4,dtype='int32'),
                    'E' : pd.Categorical(["test","train","test","train"]),
                    'F' : 'foo' })
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [50]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [51]:
#create df from random data
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.189765,0.426502,-0.499946,-1.205901
2013-01-02,-0.746064,0.532439,-0.135218,-0.249535
2013-01-03,2.128288,-2.178898,0.777346,-1.683638
2013-01-04,-0.882854,-2.115654,0.309835,-0.532152
2013-01-05,-0.295851,0.755165,0.702095,-0.162949
2013-01-06,0.2821,-0.697618,-1.220089,-0.525441


In [52]:
df.values

array([[ 0.18976508,  0.42650233, -0.49994566, -1.2059009 ],
       [-0.7460644 ,  0.53243922, -0.13521836, -0.24953492],
       [ 2.12828848, -2.17889847,  0.77734585, -1.68363762],
       [-0.88285381, -2.11565416,  0.30983519, -0.53215235],
       [-0.29585064,  0.75516477,  0.7020955 , -0.16294941],
       [ 0.28210028, -0.69761835, -1.22008932, -0.52544078]])

In [53]:
#sort data by axis 0
df.sort_index(axis=0, ascending=False)

Unnamed: 0,A,B,C,D
2013-01-06,0.2821,-0.697618,-1.220089,-0.525441
2013-01-05,-0.295851,0.755165,0.702095,-0.162949
2013-01-04,-0.882854,-2.115654,0.309835,-0.532152
2013-01-03,2.128288,-2.178898,0.777346,-1.683638
2013-01-02,-0.746064,0.532439,-0.135218,-0.249535
2013-01-01,0.189765,0.426502,-0.499946,-1.205901


In [54]:
#sort data by axis 1
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-1.205901,-0.499946,0.426502,0.189765
2013-01-02,-0.249535,-0.135218,0.532439,-0.746064
2013-01-03,-1.683638,0.777346,-2.178898,2.128288
2013-01-04,-0.532152,0.309835,-2.115654,-0.882854
2013-01-05,-0.162949,0.702095,0.755165,-0.295851
2013-01-06,-0.525441,-1.220089,-0.697618,0.2821


In [55]:
#to create one dimensional ndarray
df.as_matrix().reshape(-1).shape

(24,)

In [56]:
#to create one dimensional ndarray
df.as_matrix().ravel().shape

(24,)

In [57]:
#When wholes in index 
df.reset_index()

Unnamed: 0,index,A,B,C,D
0,2013-01-01,0.189765,0.426502,-0.499946,-1.205901
1,2013-01-02,-0.746064,0.532439,-0.135218,-0.249535
2,2013-01-03,2.128288,-2.178898,0.777346,-1.683638
3,2013-01-04,-0.882854,-2.115654,0.309835,-0.532152
4,2013-01-05,-0.295851,0.755165,0.702095,-0.162949
5,2013-01-06,0.2821,-0.697618,-1.220089,-0.525441


In [58]:
df.reset_index(drop=True, inplace=True)