###### Data Structure in Pandas

Basically there are two type of data stracture in pandas

They are:
    1. Series      1D labeled homogeneously-typed array
    2. DataFrame   General 2D labeled, size-mutable tabular structure with potentially heterogeneously-typed column
    
In short,
    DataFrame is a container for Series, and Series is a container for scalars

##### Import Preliminary

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

###### Creating a Series by passing a list of values

In [3]:
s = pd.Series([1,2,4, np.nan, 8, 9])

In [4]:
s

0    1.0
1    2.0
2    4.0
3    NaN
4    8.0
5    9.0
dtype: float64

###### Creating a DataFrame 

In [5]:
df = pd.DataFrame([[1,2],[2,3], [2,4]], index=list(range(1, 4)), columns=['a','b'])

In [6]:
df

Unnamed: 0,a,b
1,1,2
2,2,3
3,2,4


###### DatetimeIndex in pandas

In [7]:
date = pd.date_range('20180122', periods=18)

In [8]:
date

DatetimeIndex(['2018-01-22', '2018-01-23', '2018-01-24', '2018-01-25',
               '2018-01-26', '2018-01-27', '2018-01-28', '2018-01-29',
               '2018-01-30', '2018-01-31', '2018-02-01', '2018-02-02',
               '2018-02-03', '2018-02-04', '2018-02-05', '2018-02-06',
               '2018-02-07', '2018-02-08'],
              dtype='datetime64[ns]', freq='D')

###### Creating DataFrame using dictionaries 

In [9]:
info = {'name': 'Anuj', 'age': 26, 'address': 'Kathmandu'}
df = pd.DataFrame([info])

In [10]:
df

Unnamed: 0,address,age,name
0,Kathmandu,26,Anuj


###### Creating DataFrame From dict of Series or dicts¶

In [14]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
       'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'd', 'c'])}

In [15]:
df = pd.DataFrame(d)

In [16]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,4.0
d,,3.0


###### Create Dataframe from dict of ndarrays / lists¶

In [17]:
d = {'one' : [1., 2., 3., 4.],
   'two' : [4., 3., 2., 1.]}

In [18]:
df = pd.DataFrame(d)

In [19]:
df

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


###### Viewing data types of each column

In [33]:
df.dtypes

address    object
age         int64
name       object
dtype: object

###### Create big dataframe for later use.

In [46]:
data = [{key: value for key, value in zip(range(10, 20), range(110, 120))} for i in range(20)]
df = pd.DataFrame(data)

###### Get top rows from dataframe (default will be 5)

In [48]:
df.head()

Unnamed: 0,10,11,12,13,14,15,16,17,18,19
0,110,111,112,113,114,115,116,117,118,119
1,110,111,112,113,114,115,116,117,118,119
2,110,111,112,113,114,115,116,117,118,119
3,110,111,112,113,114,115,116,117,118,119
4,110,111,112,113,114,115,116,117,118,119


###### Get top custom number of rows

In [62]:
df.head(6)

Unnamed: 0,10,11,12,13,14,15,16,17,18,19
0,110,111,112,113,114,115,116,117,118,119
1,110,111,112,113,114,115,116,117,118,119
2,110,111,112,113,114,115,116,117,118,119
3,110,111,112,113,114,115,116,117,118,119
4,110,111,112,113,114,115,116,117,118,119
5,110,111,112,113,114,115,116,117,118,119


###### Get buttom rows from the dataframe (default will be 5)

In [63]:
df.tail()

Unnamed: 0,10,11,12,13,14,15,16,17,18,19
15,110,111,112,113,114,115,116,117,118,119
16,110,111,112,113,114,115,116,117,118,119
17,110,111,112,113,114,115,116,117,118,119
18,110,111,112,113,114,115,116,117,118,119
19,110,111,112,113,114,115,116,117,118,119


###### Get custum number of buttom rows

In [64]:
df.tail(6)

Unnamed: 0,10,11,12,13,14,15,16,17,18,19
14,110,111,112,113,114,115,116,117,118,119
15,110,111,112,113,114,115,116,117,118,119
16,110,111,112,113,114,115,116,117,118,119
17,110,111,112,113,114,115,116,117,118,119
18,110,111,112,113,114,115,116,117,118,119
19,110,111,112,113,114,115,116,117,118,119


###### Get index of dataframe

In [65]:
df.index

RangeIndex(start=0, stop=20, step=1)

In [66]:
list(df.index)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

###### Get column of dataframe

In [67]:
df.columns

Int64Index([10, 11, 12, 13, 14, 15, 16, 17, 18, 19], dtype='int64')

###### quick statistic summary of your data

In [68]:
df.describe()

Unnamed: 0,10,11,12,13,14,15,16,17,18,19
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,110.0,111.0,112.0,113.0,114.0,115.0,116.0,117.0,118.0,119.0
std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,110.0,111.0,112.0,113.0,114.0,115.0,116.0,117.0,118.0,119.0
25%,110.0,111.0,112.0,113.0,114.0,115.0,116.0,117.0,118.0,119.0
50%,110.0,111.0,112.0,113.0,114.0,115.0,116.0,117.0,118.0,119.0
75%,110.0,111.0,112.0,113.0,114.0,115.0,116.0,117.0,118.0,119.0
max,110.0,111.0,112.0,113.0,114.0,115.0,116.0,117.0,118.0,119.0


###### You can even inspect individual

In [61]:
df.min()

10    110
11    111
12    112
13    113
14    114
15    115
16    116
17    117
18    118
19    119
dtype: int64

In [69]:
df.mean()

10    110.0
11    111.0
12    112.0
13    113.0
14    114.0
15    115.0
16    116.0
17    117.0
18    118.0
19    119.0
dtype: float64

#### Transposing Data

In [70]:
df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
10,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110,110
11,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111
12,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112
13,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113,113
14,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,114
15,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115
16,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116
17,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117
18,118,118,118,118,118,118,118,118,118,118,118,118,118,118,118,118,118,118,118,118
19,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119,119


###### Sorting by an axis

###### Here axis=0 means rows and axis=1 means column

In [79]:
df.sort_index(axis=0, ascending=False)

Unnamed: 0,10,11,12,13,14,15,16,17,18,19
19,110,111,112,113,114,115,116,117,118,119
18,110,111,112,113,114,115,116,117,118,119
17,110,111,112,113,114,115,116,117,118,119
16,110,111,112,113,114,115,116,117,118,119
15,110,111,112,113,114,115,116,117,118,119
14,110,111,112,113,114,115,116,117,118,119
13,110,111,112,113,114,115,116,117,118,119
12,110,111,112,113,114,115,116,117,118,119
11,110,111,112,113,114,115,116,117,118,119
10,110,111,112,113,114,115,116,117,118,119


In [80]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,19,18,17,16,15,14,13,12,11,10
0,119,118,117,116,115,114,113,112,111,110
1,119,118,117,116,115,114,113,112,111,110
2,119,118,117,116,115,114,113,112,111,110
3,119,118,117,116,115,114,113,112,111,110
4,119,118,117,116,115,114,113,112,111,110
5,119,118,117,116,115,114,113,112,111,110
6,119,118,117,116,115,114,113,112,111,110
7,119,118,117,116,115,114,113,112,111,110
8,119,118,117,116,115,114,113,112,111,110
9,119,118,117,116,115,114,113,112,111,110
