## Pandas Tutorial

In [5]:
import pandas as pd
import numpy as np

In [9]:
df = pd.DataFrame(np.arange(0,20).reshape(5,4),index=['Row1','Row2','Row3','Row4','Row5'],columns=['col1','col2','col3','col4'])

In [10]:
df.head()

Unnamed: 0,col1,col2,col3,col4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11
Row4,12,13,14,15
Row5,16,17,18,19


In [11]:
df.to_csv('test1.csv')

In [12]:
## Accessing the elements

In [42]:
df['col2']

Row1     1
Row2     5
Row3     9
Row4    13
Row5    17
Name: col2, dtype: int32

In [49]:
df[['col2','col3']]

Unnamed: 0,col2,col3
Row1,1,2
Row2,5,6
Row3,9,10
Row4,13,14
Row5,17,18


In [None]:
## loc and iloc

In [19]:
df.loc['Row1']

col1    0
col2    1
col3    2
col4    3
Name: Row1, dtype: int32

In [17]:
df.loc['Row1'].shape

(4,)

In [14]:
type(df.loc['Row1'])

pandas.core.series.Series

In [18]:
#df.iloc[row1:rowN,col1:colN]
df.iloc[0:3,0:2] # indexing starts from 0

Unnamed: 0,col1,col2
Row1,0,1
Row2,4,5
Row3,8,9


In [20]:
type(df.iloc[0:3,0:2])

pandas.core.frame.DataFrame

In [25]:
data=df.iloc[0:2,0:2]
data

Unnamed: 0,col1,col2
Row1,0,1
Row2,4,5


In [26]:
type(data)

pandas.core.frame.DataFrame

In [21]:
# If there are no columns and only rows, or only 1 row and 1 column 
# the datatype with be of series.

# If there are more than one rows and columns both, 
#it will be dataframe.

In [29]:
# Convert dataframes into an array
df.iloc[:,:].values

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [31]:
df.loc[:,:].values 
#loc is different from iloc in a way that indexes 
#can't be passed in loc but can be passed in iloc.

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19]])

In [32]:
df.iloc[:,:].values.shape

(5, 4)

In [33]:
## Operations with pandas

In [36]:
df.isnull().sum() # gives the total null values in each column

col1    0
col2    0
col3    0
col4    0
dtype: int64

In [40]:
df['col1'].value_counts() # counts number of time each charater appears..used to fecth duplicate counts.

12    1
4     1
16    1
8     1
0     1
Name: col1, dtype: int64

In [41]:
df['col1'].unique()

array([ 0,  4,  8, 12, 16], dtype=int64)

In [1]:
from io import StringIO, BytesIO

In [7]:
data = ('col1,col2,col3\n'
         'x,y,1\n'
         'a,b,c\n'
         'c,d,3')

In [8]:
type(data)

str

In [9]:
#Converting string data to csv using StringIO
pd.read_csv(StringIO(data))

Unnamed: 0,col1,col2,col3
0,x,y,1
1,a,b,c
2,c,d,3


In [10]:
df=pd.read_csv(StringIO(data),usecols=['col2','col3'])
df

Unnamed: 0,col2,col3
0,y,1
1,b,c
2,d,3


In [13]:
df.to_csv('test2.csv') # saving data into CSV

In [15]:
#Coverting the data types
data1=('col1,col2,col3,col4\n'
    '1,2,3,4\n'
      '5,6,7,8\n'
      '9,10,11,12\n'
      '13,14,15,16')



In [20]:
df=pd.read_csv(StringIO(data1),dtype=float) 
#converting datatype to float
df

Unnamed: 0,col1,col2,col3,col4
0,1.0,2.0,3.0,4.0
1,5.0,6.0,7.0,8.0
2,9.0,10.0,11.0,12.0
3,13.0,14.0,15.0,16.0


In [21]:
df=pd.read_csv(StringIO(data1),dtype=int)
#converting datatype to int
df

Unnamed: 0,col1,col2,col3,col4
0,1,2,3,4
1,5,6,7,8
2,9,10,11,12
3,13,14,15,16
