In [12]:
import numpy as np
import pandas as pd

## 1-D Data

### We use 'pd.Series' objects to deal with one dimensional data

In [9]:
series = pd.Series()
print('{}\n'.format(series))

series = pd.Series(5)
print('{}\n'.format(series))

series = pd.Series([1,2,3])
print('{}\n'.format(series))

series = pd.Series([1, 2.2]) #upcasting (int -> float)
print('{}\n'.format(series))


Series([], dtype: float64)

0    5
dtype: int64

0    1
1    2
2    3
dtype: int64

0    1.0
1    2.2
dtype: float64



  series = pd.Series()


### use 'dtype' attribute to typecast

In [13]:
arr = np.array([1, 2])
series = pd.Series(arr, dtype=np.float32)
print('{}\n'.format(series))

0    1.0
1    2.0
dtype: float32



In [18]:
series = pd.Series([[1, 2], [3, 4]])
print('{}\n'.format(series))

0    [1, 2]
1    [3, 4]
dtype: object



## Index

### use the 'index' attribute to change the type of index (i. e. the serial numbers). 

Note: The index must be of same length as the series and can be of any data type

In [21]:
series = pd.Series([1,2,3], index=['a','b','c'])
print(series)

a    1
b    2
c    3
dtype: int64


## Dictionary input


In [24]:
series = pd.Series({'a':1, 'b':2, 'c':3})
print('{}\n'.format(series))

series = pd.Series({'b':2, 'a':1, 'c':3})
print('{}\n'.format(series))

a    1
b    2
c    3
dtype: int64

b    2
a    1
c    3
dtype: int64



# DataFrame

## 2-D Data

### use 'pandas.DataFrame' object to deal with tabular data

In [30]:
df = pd.DataFrame()
print(df)
print("\n")

df = pd.DataFrame([5,6])
print(df)
print("\n")

df = pd.DataFrame([[5,6],[7,8]])
print(df)

Empty DataFrame
Columns: []
Index: []


   0
0  5
1  6


   0  1
0  5  6
1  7  8


### use the 'index' and 'columns' attibutes to name the row and column headers respectively

In [31]:
df = pd.DataFrame([[5,6], [1,3]], index=['r1', 'r2'], columns=['c1', 'c2'])
print(df)

    c1  c2
r1   5   6
r2   1   3


### if we use Dictionary to describe the series, the column header values are automatically taken from the dictionary

In [33]:
df = pd.DataFrame({'c1':[1,2], 'c2':[3,4]}, index=['r1', 'r2'])
print(df)

    c1  c2
r1   1   3
r2   2   4


## Upcasting

### upcasting occurs only on column basis

In [37]:
df = pd.DataFrame([[1,2],[3.1,4]], columns=['a','b'])
print(df)
print("\n")
#dtype of each columns
print(df.dtypes)

     a  b
0  1.0  2
1  3.1  4


a    float64
b      int64
dtype: object


## Appending Rows

### use 'append' function to add rows to an existing dataframe

Note: We need to specify a name for the index new series to be appended, using the 'name' attribute or use the 'ignore_index=True' attribute

In [39]:
df = pd.DataFrame([[5,6], [1.2,3]])
ser = pd.Series([0,0])
ser2 = pd.Series([1,1], name='r3')

print('{}\n'.format(df))

df_new = df.append(ser, ignore_index=True)
print('{}\n'.format(df_new))

df_new2 = df.append(ser2)
print('{}\n'.format(df_new2))

     0  1
0  5.0  6
1  1.2  3

     0  1
0  5.0  6
1  1.2  3
2  0.0  0

      0  1
0   5.0  6
1   1.2  3
r3  1.0  1



## Dropping Data

In [48]:
print(df)

df_drop = df.drop(labels=0)
print('\n{}'.format(df_drop))

df_drop = df.drop(labels=0, axis=1)
print('\n{}'.format(df_drop))

df_drop = df.drop(labels=[0,1])
print('\n{}'.format(df_drop))

df_drop = df.drop(columns=1)
print('\n{}'.format(df_drop))

     0  1
0  5.0  6
1  1.2  3

     0  1
1  1.2  3

   1
0  6
1  3

Empty DataFrame
Columns: [0, 1]
Index: []

     0
0  5.0
1  1.2


## Concat

### use 'concat' function to combine multiple dataframes together and 'axis' attribute wheter to concat rows (default) (0) or column(1)

In [56]:
df1 = pd.DataFrame({'c1':[1,2], 'c2':[3,4]},
                   index=['r1','r2'])
df2 = pd.DataFrame({'c1':[5,6], 'c2':[7,8]},
                   index=['r1','r2'])
df3 = pd.DataFrame({'c1':[5,6], 'c2':[7,8]},
                   index=['r1','r2'])

concat_col = pd.concat([df1,df2], axis=1)
print('{}\n'.format(concat_col))
concat_row = pd.concat([df1,df2], axis=0)
print('{}\n'.format(concat_row))

    c1  c2  c1  c2
r1   1   3   5   7
r2   2   4   6   8

    c1  c2
r1   1   3
r2   2   4
r1   5   7
r2   6   8



In [58]:
concat = pd.concat([df1,df2,df3])
print('{}\n'.format(concat))

concat = pd.concat([df1,df2,df3], axis=1)
print('{}\n'.format(concat))

    c1  c2
r1   1   3
r2   2   4
r1   5   7
r2   6   8
r1   5   7
r2   6   8

    c1  c2  c1  c2  c1  c2
r1   1   3   5   7   5   7
r2   2   4   6   8   6   8



## Merge

### use 'merge' function to completely merge multiple dataframes into one. By default, it merges with the common column label


In [69]:
mdf1 = pd.DataFrame({'name':['ram','vignesh','john'],
                   'pos':['student','teacher','principal'],
                   'year':[2000,2004,2006]})
print('{}\n'.format(mdf1))

mdf2 = pd.DataFrame({'name':['ram','vignesh','doe'],
                     'year':[2000,2004,2006],
                   'rbi':[80,100,12]})
print('{}\n'.format(mdf2))

merged = pd.merge(mdf1,mdf2)
print(merged)

      name        pos  year
0      ram    student  2000
1  vignesh    teacher  2004
2     john  principal  2006

      name  year  rbi
0      ram  2000   80
1  vignesh  2004  100
2      doe  2006   12

      name      pos  year  rbi
0      ram  student  2000   80
1  vignesh  teacher  2004  100
