# Pandas Viewing Walkthrough

In [1]:
import numpy as np
import pandas as pd

### Object creation

In [2]:
s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

In [7]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.419453,-1.301803,-0.685451,-0.40249
2013-01-02,-1.260867,-0.083475,0.159367,0.497434
2013-01-03,0.157631,0.108265,-0.553452,0.891267
2013-01-04,1.677113,-2.481489,0.562758,-0.386944
2013-01-05,0.834419,-0.271327,0.374099,0.878938
2013-01-06,2.126481,0.975113,-0.849575,-0.947515


In [8]:
df2 = pd.DataFrame({
    'A': 1.,
    'B': pd.Timestamp('20130102'),
    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
    'D': np.array([3]*4, dtype='int32'),
    'E': pd.Categorical(['test', 'train', 'test', 'train']),
    'F': 'foo'
})

In [9]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [11]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### Viewing data

In [12]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.419453,-1.301803,-0.685451,-0.40249
2013-01-02,-1.260867,-0.083475,0.159367,0.497434
2013-01-03,0.157631,0.108265,-0.553452,0.891267
2013-01-04,1.677113,-2.481489,0.562758,-0.386944
2013-01-05,0.834419,-0.271327,0.374099,0.878938


In [13]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,1.677113,-2.481489,0.562758,-0.386944
2013-01-05,0.834419,-0.271327,0.374099,0.878938
2013-01-06,2.126481,0.975113,-0.849575,-0.947515


In [14]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [15]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [16]:
df.to_numpy()

array([[-0.4194528 , -1.30180262, -0.68545107, -0.4024899 ],
       [-1.26086675, -0.08347481,  0.15936689,  0.4974344 ],
       [ 0.15763073,  0.1082651 , -0.55345204,  0.89126695],
       [ 1.67711253, -2.48148858,  0.56275822, -0.38694413],
       [ 0.83441937, -0.27132745,  0.37409862,  0.87893789],
       [ 2.12648093,  0.97511338, -0.84957511, -0.9475155 ]])

In [17]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.519221,-0.509119,-0.165376,0.088448
std,1.280931,1.211645,0.602641,0.771609
min,-1.260867,-2.481489,-0.849575,-0.947515
25%,-0.275182,-1.044184,-0.652451,-0.398603
50%,0.496025,-0.177401,-0.197043,0.055245
75%,1.466439,0.06033,0.320416,0.783562
max,2.126481,0.975113,0.562758,0.891267


In [18]:
df.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-0.419453,-1.260867,0.157631,1.677113,0.834419,2.126481
B,-1.301803,-0.083475,0.108265,-2.481489,-0.271327,0.975113
C,-0.685451,0.159367,-0.553452,0.562758,0.374099,-0.849575
D,-0.40249,0.497434,0.891267,-0.386944,0.878938,-0.947515


In [19]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.40249,-0.685451,-1.301803,-0.419453
2013-01-02,0.497434,0.159367,-0.083475,-1.260867
2013-01-03,0.891267,-0.553452,0.108265,0.157631
2013-01-04,-0.386944,0.562758,-2.481489,1.677113
2013-01-05,0.878938,0.374099,-0.271327,0.834419
2013-01-06,-0.947515,-0.849575,0.975113,2.126481


In [20]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-04,1.677113,-2.481489,0.562758,-0.386944
2013-01-01,-0.419453,-1.301803,-0.685451,-0.40249
2013-01-05,0.834419,-0.271327,0.374099,0.878938
2013-01-02,-1.260867,-0.083475,0.159367,0.497434
2013-01-03,0.157631,0.108265,-0.553452,0.891267
2013-01-06,2.126481,0.975113,-0.849575,-0.947515
