In [2]:
import pandas as pd
import numpy as np

## A dataframe is
1. a 2D numpy array with row and column indexes
2. a dictinary with column names mapping to a Series of column data.

## Ways to create a dataframe

### 1.  From a single series object.

In [6]:
share_price = pd.Series({'Microsoft':107,'Amazon':1600, 'Apple':155, 'Google':1035})
valuation  = pd.Series([823,827,730,790],index = ['Microsoft','Amazon','Apple', 'Google'])

# pd.DataFrame([share_price,valuation],columns = ['share_price','valuation']) #does not work
pd.DataFrame(share_price,columns = ['share_price']) 

Unnamed: 0,share_price
Microsoft,107
Amazon,1600
Apple,155
Google,1035


### 2. From a dictionary of series

In [15]:
pd.DataFrame( {'share_price':share_price, 'valuation': valuation})

Unnamed: 0,share_price,valuation
Microsoft,107,823
Amazon,1600,827
Apple,155,730
Google,1035,790


### 3. From a list of dictionary/series . Keys will be columns here

Remember, using list of series converts series indexes into columns

In [9]:
pd.DataFrame([share_price,valuation], index = ['share_price','valuation'])

Unnamed: 0,Microsoft,Amazon,Apple,Google
share_price,107,1600,155,1035
valuation,823,827,730,790


In [11]:
pd.DataFrame([{'a':1,'b':2},
              {'a':'one','b':'two'}], index = ['number','word'])

Unnamed: 0,a,b
number,1,2
word,one,two


### 4. From a 2D numpy array

In [16]:
pd.DataFrame([[ 107, 1600,  155, 1035],
              [823,827,730,790]],
            columns = ['Microsoft','Amazon','Apple', 'Google'],
            index = ['share_price','valuation'])

Unnamed: 0,Microsoft,Amazon,Apple,Google
share_price,107,1600,155,1035
valuation,823,827,730,790


In [11]:
#passing a dictionary of arrays will make keys to columns.
pd.DataFrame({'share_price':[ 107, 1600,  155, 1035],
              'valuation':[823,827,730,790]})

Unnamed: 0,share_price,valuation
0,107,823
1,1600,827
2,155,730
3,1035,790


In [9]:
pd.DataFrame(np.array([[ 107, 1600,  155, 1035],
              [823,827,730,790]]).T,
             
            index = ['Microsoft','Amazon','Apple', 'Google'],
            columns = ['share_price','valuation'])

Unnamed: 0,share_price,valuation
Microsoft,107,823
Amazon,1600,827
Apple,155,730
Google,1035,790


### 5. Numpy Record Array

In [24]:
a = np.zeros(3, dtype=[('weight','f8'),('age','i4')])
pd.DataFrame(a, index = ['first','second','third'])

Unnamed: 0,weight,age
first,0.0,0
second,0.0,0
third,0.0,0


## pandas index

In [27]:
i = pd.Index([1,2,3])
i

Int64Index([1, 2, 3], dtype='int64')

In [28]:
i[-1]

3

In [29]:
i>3

array([False, False, False])

### pandas index is immutable

In [30]:
i[0]=5

TypeError: Index does not support mutable operations

In [35]:
i = pd.Index([1,2,3])
j = pd.Index([2,3,4])
i&j #and

Int64Index([2, 3], dtype='int64')

In [36]:
i|j#or

Int64Index([1, 2, 3, 4], dtype='int64')

In [38]:
i^j#xor

Int64Index([1, 4], dtype='int64')

In [39]:
a = np.array([1,2,3])
b=np.array([2,3,4])
a & b

array([0, 2, 0], dtype=int32)