# [10 Minutes to pandas](http://pandas.pydata.org/pandas-docs/stable/10min.html) #

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Object Creation ##
Links:
- [Intro to Data Structures](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#dsintro)
- [Series](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html#pandas.Series)
- [DataFrame](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html#pandas.DataFrame)
- [dtypes](http://pandas.pydata.org/pandas-docs/stable/basics.html#basics-dtypes)

In [2]:
# creating Series by passing list of values
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
# creating a DataFrame by passing a NumPy array with a datetime index and labled columns
dates = pd.date_range('20190101', periods=6)

In [5]:
dates

DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
               '2019-01-05', '2019-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

In [7]:
df

Unnamed: 0,A,B,C,D
2019-01-01,0.712858,-0.056186,-1.275078,0.453491
2019-01-02,-0.550603,1.490343,-0.781002,-0.555667
2019-01-03,0.1093,0.943349,0.617169,1.107052
2019-01-04,-1.718014,-0.052366,0.080293,0.716037
2019-01-05,-1.639626,0.295878,0.642817,-0.469266
2019-01-06,-0.738423,-0.603528,-0.824169,-1.656688


In [8]:
# creating DataFrame by passing a dict of objects
df2 = pd.DataFrame({'A' : 1.,
                    'B' : pd.Timestamp('20190101'),
                    'C' : pd.Series(1, index=list(range(4)), dtype='float32'),
                    'D' : np.array([3] * 4, dtype='int32'),
                    'E' : pd.Categorical(["test", "train", "test", "train"]),
                    'F' : 'foo'})

In [9]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2019-01-01,1.0,3,test,foo
1,1.0,2019-01-01,1.0,3,train,foo
2,1.0,2019-01-01,1.0,3,test,foo
3,1.0,2019-01-01,1.0,3,train,foo


In [10]:
# get column dtypes
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

## Viewing Data ##
Links:
- [Essential Basicc Functionality](http://pandas.pydata.org/pandas-docs/stable/basics.html#basics)

In [11]:
df.head()

Unnamed: 0,A,B,C,D
2019-01-01,0.712858,-0.056186,-1.275078,0.453491
2019-01-02,-0.550603,1.490343,-0.781002,-0.555667
2019-01-03,0.1093,0.943349,0.617169,1.107052
2019-01-04,-1.718014,-0.052366,0.080293,0.716037
2019-01-05,-1.639626,0.295878,0.642817,-0.469266


In [12]:
df.tail(3)

Unnamed: 0,A,B,C,D
2019-01-04,-1.718014,-0.052366,0.080293,0.716037
2019-01-05,-1.639626,0.295878,0.642817,-0.469266
2019-01-06,-0.738423,-0.603528,-0.824169,-1.656688


In [13]:
df.index

DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
               '2019-01-05', '2019-01-06'],
              dtype='datetime64[ns]', freq='D')

In [14]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [15]:
df.values

array([[ 0.71285819, -0.05618576, -1.27507751,  0.45349098],
       [-0.55060257,  1.49034269, -0.78100198, -0.55566679],
       [ 0.10930042,  0.94334884,  0.61716936,  1.10705187],
       [-1.71801381, -0.05236597,  0.08029279,  0.71603743],
       [-1.63962627,  0.29587846,  0.64281732, -0.46926624],
       [-0.73842259, -0.60352799, -0.82416916, -1.65668804]])

In [16]:
# quick startistic and summary of data
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.637418,0.336248,-0.256662,-0.067507
std,0.956105,0.760308,0.814905,1.01884
min,-1.718014,-0.603528,-1.275078,-1.656688
25%,-1.414325,-0.055231,-0.813377,-0.534067
50%,-0.644513,0.121756,-0.350355,-0.007888
75%,-0.055675,0.781481,0.48295,0.650401
max,0.712858,1.490343,0.642817,1.107052


In [17]:
#transposing data
df.T

Unnamed: 0,2019-01-01 00:00:00,2019-01-02 00:00:00,2019-01-03 00:00:00,2019-01-04 00:00:00,2019-01-05 00:00:00,2019-01-06 00:00:00
A,0.712858,-0.550603,0.1093,-1.718014,-1.639626,-0.738423
B,-0.056186,1.490343,0.943349,-0.052366,0.295878,-0.603528
C,-1.275078,-0.781002,0.617169,0.080293,0.642817,-0.824169
D,0.453491,-0.555667,1.107052,0.716037,-0.469266,-1.656688


In [18]:
#sorting by an axis
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2019-01-01,0.453491,-1.275078,-0.056186,0.712858
2019-01-02,-0.555667,-0.781002,1.490343,-0.550603
2019-01-03,1.107052,0.617169,0.943349,0.1093
2019-01-04,0.716037,0.080293,-0.052366,-1.718014
2019-01-05,-0.469266,0.642817,0.295878,-1.639626
2019-01-06,-1.656688,-0.824169,-0.603528,-0.738423


In [19]:
# sorting by values
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2019-01-06,-0.738423,-0.603528,-0.824169,-1.656688
2019-01-01,0.712858,-0.056186,-1.275078,0.453491
2019-01-04,-1.718014,-0.052366,0.080293,0.716037
2019-01-05,-1.639626,0.295878,0.642817,-0.469266
2019-01-03,0.1093,0.943349,0.617169,1.107052
2019-01-02,-0.550603,1.490343,-0.781002,-0.555667
