In [25]:
import numpy as np
from pandas import DataFrame, Series

In [4]:
marks = {
    'Name': ['mehul', 'keyur', 'jane'],
    'Roll': [10, 11, 23],
    'Physics': [95, 97, 90],
    'Chemistry': [99, 93, 89]
}
df1 = DataFrame(marks)
df1

Unnamed: 0,Chemistry,Name,Physics,Roll
0,99,mehul,95,10
1,93,keyur,97,11
2,89,jane,90,23


In [5]:
df2 = DataFrame(marks, index=marks['Roll'])
df2

Unnamed: 0,Chemistry,Name,Physics,Roll
10,99,mehul,95,10
11,93,keyur,97,11
23,89,jane,90,23


In [9]:
# indexing
# indexing by the explicit row index roll number
print(df2.loc[11])
print(df2.loc[[10, 23]]) # multiple indexing gives another data frame

# indexinv by the implicit 0 based indexing
print(df2.iloc[-1])
print(df2.iloc[[0, -1]])

Chemistry       93
Name         keyur
Physics         97
Roll            11
Name: 11, dtype: object
    Chemistry   Name  Physics  Roll
10         99  mehul       95    10
23         89   jane       90    23
Chemistry      89
Name         jane
Physics        90
Roll           23
Name: 23, dtype: object
    Chemistry   Name  Physics  Roll
10         99  mehul       95    10
23         89   jane       90    23


In [12]:
# column level indexing
print(df2)
print(df2[['Physics', 'Chemistry']]) # multi column indexing gives u a dataframe
print(df2['Name']) # series for single column indexing

    Chemistry   Name  Physics  Roll
10         99  mehul       95    10
11         93  keyur       97    11
23         89   jane       90    23
    Physics  Chemistry
10       95         99
11       97         93
23       90         89
10    mehul
11    keyur
23     jane
Name: Name, dtype: object


In [15]:
# row level indexing + column level indexing
df3 = df2.iloc[[0, -1]]
df4 = df3[['Physics', 'Chemistry']]
df4

Unnamed: 0,Physics,Chemistry
10,95,99
23,90,89


In [16]:
df2.loc[[10, 23], ['Physics', 'Chemistry']]

Unnamed: 0,Physics,Chemistry
10,95,99
23,90,89


In [20]:
# slicing
print(df2)
print(df2.iloc[:2])
print(df2.iloc[-2:])
print(df2.head(2))

    Chemistry   Name  Physics  Roll
10         99  mehul       95    10
11         93  keyur       97    11
23         89   jane       90    23
    Chemistry   Name  Physics  Roll
10         99  mehul       95    10
11         93  keyur       97    11
    Chemistry   Name  Physics  Roll
11         93  keyur       97    11
23         89   jane       90    23
    Chemistry   Name  Physics  Roll
10         99  mehul       95    10
11         93  keyur       97    11


In [22]:
# get the indexes from the dataframe
# row level index
print(df2.index) # row level index
print(df2.columns) # column level index

Int64Index([10, 11, 23], dtype='int64')
Index(['Chemistry', 'Name', 'Physics', 'Roll'], dtype='object')


In [27]:
print(df2)
maths = Series({11: 98, 23: 78, 10: 99})
print(maths)
# add a new column in a data frame
df2['Maths'] = maths # the row index of the data frame is same as the row index of the series (roll numbers)
print(df2)

    Chemistry   Name  Physics  Roll
10         99  mehul       95    10
11         93  keyur       97    11
23         89   jane       90    23
10    99
11    98
23    78
dtype: int64
    Chemistry   Name  Physics  Roll  Maths
10         99  mehul       95    10     99
11         93  keyur       97    11     98
23         89   jane       90    23     78


In [29]:
# add / remove derived columns in a data frame
df2['Total'] = df2['Physics'] + df2['Chemistry'] + df2['Maths']
print(df2)
del df2['Total']
print(df2)

    Chemistry   Name  Physics  Roll  Maths  Total
10         99  mehul       95    10     99    293
11         93  keyur       97    11     98    288
23         89   jane       90    23     78    257
    Chemistry   Name  Physics  Roll  Maths
10         99  mehul       95    10     99
11         93  keyur       97    11     98
23         89   jane       90    23     78


In [37]:
# Mathematical operations
print(df2)
pcm = df2[['Physics', 'Chemistry', 'Maths']]
print(pcm)
print(pcm.mean(axis=0)) # across rows
print(pcm.sum(axis=1)) # across columns

    Chemistry   Name  Physics  Roll  Maths
10         99  mehul       95    10     99
11         93  keyur       97    11     98
23         89   jane       90    23     78
    Physics  Chemistry  Maths
10       95         99     99
11       97         93     98
23       90         89     78
Physics      94.000000
Chemistry    93.666667
Maths        91.666667
dtype: float64
10    293
11    288
23    257
dtype: int64


In [39]:
print(df2)
df5 = DataFrame({
    'Verbal': [90, 99, 89],
    'Quant': [89, 85, 90]
}, index=[11, 23, 10])
print(df5)

    Chemistry   Name  Physics  Roll  Maths
10         99  mehul       95    10     99
11         93  keyur       97    11     98
23         89   jane       90    23     78
    Quant  Verbal
11     89      90
23     85      99
10     90      89


In [40]:
df2.join(df5)

Unnamed: 0,Chemistry,Name,Physics,Roll,Maths,Quant,Verbal
10,99,mehul,95,10,99,90,89
11,93,keyur,97,11,98,89,90
23,89,jane,90,23,78,85,99
