In [1]:
import numpy as np
import pandas as pd

In [2]:
#Lets create a dataframe

df = pd.DataFrame(np.arange(0,20).reshape(5,4) , index = ['Row1','Row2',
                                                         'Row3','Row4',
                                                         'Row5'],
                 columns = ['Col1','Col2','Col3','Col4'])
df

Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


### Retrive one column

In [5]:
df['Col1']

Row1     0
Row2     4
Row3     8
Row4    12
Row5    16
Name: Col1, dtype: int32

In [9]:
type(df['Col1'])

pandas.core.series.Series

### Retrive one Row

In [10]:
df.loc['Row1']

Col1    0
Col2    1
Col3    2
Col4    3
Name: Row1, dtype: int32

In [11]:
type(df.loc['Row1'])

pandas.core.series.Series

Series is any one column or any one row.

In [7]:
df.loc[['Row1','Row2']]

Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,5,6,7


In [13]:
df.iloc[0:2,:]

Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,5,6,7


In [16]:
df.loc[['Row1'],['Col1','Col2']]

Unnamed: 0,Col1,Col2
Row1,0,1


In [17]:
type(df.iloc[0:2,0])

pandas.core.series.Series

In [18]:
type(df.iloc[0:2,0:1])

pandas.core.frame.DataFrame

In the above two example, see that if you fetch only one column it is a series. And if you fetch more than one column it is a dataframe.

**Convert Dataframe into array**

In [20]:
df[['Col1','Col2']].values

array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13],
       [16, 17]])

In [23]:
df[['Col1','Col2']].values.shape

(5, 2)

**Note:** <br>
Difference between loc and iloc is that, loc takes 'Labels' and iloc takes 'Indexes'.

In [24]:
df['Col1'] = np.where(df['Col1']%2==0, 2 , 1)

**Calculate frequency of unique values of a column**

In [26]:
df['Col1'].value_counts()

2    5
Name: Col1, dtype: int64

In Col1, we have 5 rows with value 2.

In [27]:
df['Col1'].unique()

array([2])

In [28]:
df['Col2'].unique()

array([ 1,  5,  9, 13, 17])

**Strings and Data Frames**

In [32]:
from io import StringIO

In [33]:
data = ('col1,col2,col3\n'
       'a,b,1\n'
       'x,y,z\n'
       'p,q,r')
data

'col1,col2,col3\na,b,1\nx,y,z\np,q,r'

In [36]:
type(data)

str

In [35]:
df1 = pd.read_csv(StringIO(data))
df1

Unnamed: 0,col1,col2,col3
0,a,b,1
1,x,y,z
2,p,q,r


In [37]:
df2 = pd.read_csv(StringIO(data), usecols= ['col1','col3'])
df2

Unnamed: 0,col1,col3
0,a,1
1,x,z
2,p,r


In [40]:
df1.col1.dtypes

dtype('O')

In [43]:
df2['col1'][1]

'x'

In [44]:
type(df2['col1'][1])

str

In [48]:
data1 = ('col1,col2,col3\n'
       'a,b,1\n'
       'x,y,2\n'
       'p,q,3')

In [63]:
df3 = pd.read_csv(StringIO(data1) , 
                  dtype = {'col1':'O' , 'col2':'O' ,'col3':'Int64'})
df3

Unnamed: 0,col1,col2,col3
0,a,b,1
1,x,y,2
2,p,q,3


In [64]:
type(df3['col3'][1])

numpy.int64

In [65]:
df3.dtypes

col1    object
col2    object
col3     Int64
dtype: object

**Copying column values into index**

In [66]:
data2 = ('index,col1,col2,col3\n'
       '6,a,b,1\n'
       '7,x,y,2\n'
       '8,p,q,3')

In [67]:
df4 = pd.read_csv(StringIO(data2) , dtype = {'index':int , 'col3':'Int64'})
df4

Unnamed: 0,index,col1,col2,col3
0,6,a,b,1
1,7,x,y,2
2,8,p,q,3


In [68]:
df4 = pd.read_csv(StringIO(data2) ,
                  dtype = {'index':int , 'col3':'Int64'},
                 index_col = 0)
df4

Unnamed: 0_level_0,col1,col2,col3
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
6,a,b,1
7,x,y,2
8,p,q,3


In [69]:
df4.index

Int64Index([6, 7, 8], dtype='int64', name='index')

**Sometimes, if the contents of 1st column are integer it will consider them as index**

In [72]:
data3 = ('a,b,c\n'
       '4,apple,bat\n'
       '8,cat,rat')

In [77]:
df5 = pd.read_csv(StringIO(data3), index_col = False)
df5

Unnamed: 0,a,b,c
0,4,apple,bat
1,8,cat,rat


In [78]:
df5 = pd.read_csv(StringIO(data3), usecols = ['b','c'], index_col = False)
df5

Unnamed: 0,b,c
0,apple,bat
1,cat,rat


**To exclude any character from the string**

In [89]:
data4 = 'c1,c2,c3,c4 \n,Hello Aaron, Lets play football today, and, then go to america.'

In [95]:
pd.read_csv(StringIO(data4) , escapechar = 'a')

Unnamed: 0,c1,c2,c3,c4
,Hello Aron,Lets ply footbll tody,nd,then go to meric.
