In [3]:
import pandas as pd

In [5]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
 'year': [2000, 2001, 2002, 2001, 2002, 2003],
 'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


# changing column arrangement

In [7]:
f1 = pd.DataFrame(data, columns=['year', 'state', 'pop'])
f1

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9
5,2003,Nevada,3.2


In [9]:
f1 = pd.DataFrame(data, columns=['year', 'state', 'pop','extra'], index = [11,22,33,44,55,66])
f1

Unnamed: 0,year,state,pop,extra
11,2000,Ohio,1.5,
22,2001,Ohio,1.7,
33,2002,Ohio,3.6,
44,2001,Nevada,2.4,
55,2002,Nevada,2.9,
66,2003,Nevada,3.2,


In [60]:
f1 = pd.DataFrame(data, columns=['year', 'state', 'pop','debt'], index = ["one","two","three","four","five","six"])
f1

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,
six,2003,Nevada,3.2,


In [61]:
f1.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [62]:
f1.index

Index(['one', 'two', 'three', 'four', 'five', 'six'], dtype='object')

In [63]:
f1['pop']

one      1.5
two      1.7
three    3.6
four     2.4
five     2.9
six      3.2
Name: pop, dtype: float64

In [64]:
f1.debt 

one      NaN
two      NaN
three    NaN
four     NaN
five     NaN
six      NaN
Name: debt, dtype: object

In [20]:
f1.loc['six']

year       2003
state    Nevada
pop         3.2
debt        NaN
Name: six, dtype: object

In [21]:
val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five']) # matching series with dataframe

In [22]:
val

two    -1.2
four   -1.5
five   -1.7
dtype: float64

In [23]:
f1.debt = val

In [24]:
f1

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,-1.5
five,2002,Nevada,2.9,-1.7
six,2003,Nevada,3.2,


In [25]:
pop = {'Nevada': {2001: 2.4, 2002: 2.9},   # Inner dictionary becomesz index by default
       'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}
frame3 = pd.DataFrame(pop)
frame3

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [36]:
extra = pd.DataFrame(pop, index=[2000, 2001, 2002, 2003])
extra

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6
2003,,


In [30]:
pdata = {'Ohio': frame3['Ohio'][:-1],     # rows ki form slicing horahi hai [starting row index: ending row index]
        'Nevada': frame3['Nevada'][:2]}   # -1 mai last row include nahi hogi, for Ohio
pdata = pd.DataFrame(pdata)

In [31]:
pdata

Unnamed: 0,Ohio,Nevada
2000,1.5,
2001,1.7,2.4


In [32]:
frame3.index.name = 'year'; frame3.columns.name = 'state' # giving titles to index and columns
frame3

state,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [33]:
frame3.values

array([[nan, 1.5],
       [2.4, 1.7],
       [2.9, 3.6]])

In [37]:
extra.values

array([[nan, 1.5],
       [2.4, 1.7],
       [2.9, 3.6],
       [nan, nan]])

# Essential Functionality

In [None]:
# Reindexing on Series

In [38]:
obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [39]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e']) # reindexing mean we are rearranging the indexes and might add some new index
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [40]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
obj3


0      blue
2    purple
4    yellow
dtype: object

In [43]:
obj3.reindex(range(6), method = 'ffill') # ffill --> forward fill

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [45]:
obj3.reindex(range(6), method = 'bfill') # bfill --> backward fill, 
                                         # problem for last object as it cannot get data from its succeeding object

0      blue
1    purple
2    purple
3    yellow
4    yellow
5       NaN
dtype: object

In [None]:
# Reindexing on Frame

In [47]:
import numpy as np

In [48]:
frame = pd.DataFrame(np.arange(9).reshape((3, 3)),
        index=['a', 'c', 'd'],
        columns=['Ohio', 'Texas', 'California'])
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [49]:
frame2 = frame.reindex(['a','b','c','d','e']) # reindexing the indexes, there is a method to reindex columns as well
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0
e,,,


In [51]:
states = ['Texas','Utah','California'] # doing it for the columns now
frame2.reindex(columns = states)


Unnamed: 0,Texas,Utah,California
a,1.0,,2.0
b,,,
c,4.0,,5.0
d,7.0,,8.0
e,,,


In [52]:
# reindexing both, index and column
frame2.reindex(index=['f','b','a','d','c','g'], columns = ['Texas','Nevada','Utah','California','New York','Chicago'])

Unnamed: 0,Texas,Nevada,Utah,California,New York,Chicago
f,,,,,,
b,,,,,,
a,1.0,,,2.0,,
d,7.0,,,8.0,,
c,4.0,,,5.0,,
g,,,,,,


In [88]:
data = pd.DataFrame(np.arange(1,101).reshape((20,5)), columns = ['a','b','c','d','e'], index=range(1,101,5))

In [89]:
data

Unnamed: 0,a,b,c,d,e
1,1,2,3,4,5
6,6,7,8,9,10
11,11,12,13,14,15
16,16,17,18,19,20
21,21,22,23,24,25
26,26,27,28,29,30
31,31,32,33,34,35
36,36,37,38,39,40
41,41,42,43,44,45
46,46,47,48,49,50


In [None]:
data = pd.DataFrame(np.arange(1,101).reshape((20,5)), columns = ['a','b','c','d','e'], index=range(1,101,5))

In [91]:
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0
e,,,


In [94]:
frame2.loc['c']['California']

5.0

In [95]:
frame2.loc['c'][2]

5.0

In [98]:
frame2.iloc[::-1] # displaying rows in reverse

Unnamed: 0,Ohio,Texas,California
e,,,
d,6.0,7.0,8.0
c,3.0,4.0,5.0
b,,,
a,0.0,1.0,2.0


In [99]:
frame2.iloc[::-1, ::-1] # displaying rows and columns in reverse

Unnamed: 0,California,Texas,Ohio
e,,,
d,8.0,7.0,6.0
c,5.0,4.0,3.0
b,,,
a,2.0,1.0,0.0


In [102]:
frame2.loc['a':'c']

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0


In [104]:
frame2.iloc[0:3] # ilocation last value ko count nahi karta

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0


In [112]:
frame2.loc['b']['Texas'] = 6
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,6.0,
c,3.0,4.0,5.0
d,6.0,7.0,8.0
e,,,


In [118]:
frame2.loc['b'][[0,2]] = [4,6]
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,4.0,6.0,6.0
c,3.0,4.0,5.0
d,6.0,7.0,8.0
e,,,


In [119]:
frame2.loc['b'][[2,0]]= frame2.loc['b'][[0,2]]

In [120]:
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,6.0,6.0,4.0
c,3.0,4.0,5.0
d,6.0,7.0,8.0
e,,,


In [122]:
frame2['Texas'][4] + 5

nan

In [None]:
# Anything or any number added with NaN gives NaN

In [126]:
frame4 = frame2.copy()
frame4['std'] = [4,5,7,8,9]
frame4

Unnamed: 0,Ohio,Texas,California,std
a,0.0,1.0,2.0,4
b,6.0,6.0,4.0,5
c,3.0,4.0,5.0,7
d,6.0,7.0,8.0,8
e,,,,9


In [128]:
frame2    # frame2 doesn't have std column

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,6.0,6.0,4.0
c,3.0,4.0,5.0
d,6.0,7.0,8.0
e,,,


In [129]:
frame4 + frame2 # shows anything added in NaN gives NaN

Unnamed: 0,California,Ohio,Texas,std
a,4.0,0.0,2.0,
b,8.0,12.0,12.0,
c,10.0,6.0,8.0,
d,16.0,12.0,14.0,
e,,,,


In [130]:
frame4 - frame2

Unnamed: 0,California,Ohio,Texas,std
a,0.0,0.0,0.0,
b,0.0,0.0,0.0,
c,0.0,0.0,0.0,
d,0.0,0.0,0.0,
e,,,,


In [131]:
frame2 - frame4

Unnamed: 0,California,Ohio,Texas,std
a,0.0,0.0,0.0,
b,0.0,0.0,0.0,
c,0.0,0.0,0.0,
d,0.0,0.0,0.0,
e,,,,


In [134]:
abs(-4.98)

4.98

In [133]:
abs(8)

8

In [137]:
frame4

Unnamed: 0,Ohio,Texas,California,std
a,0.0,1.0,2.0,4
b,6.0,6.0,4.0,5
c,3.0,4.0,5.0,7
d,6.0,7.0,8.0,8
e,,,,9


In [136]:
frame4.isna()

Unnamed: 0,Ohio,Texas,California,std
a,False,False,False,False
b,False,False,False,False
c,False,False,False,False
d,False,False,False,False
e,True,True,True,False


In [138]:
frame4.fillna(0)

Unnamed: 0,Ohio,Texas,California,std
a,0.0,1.0,2.0,4
b,6.0,6.0,4.0,5
c,3.0,4.0,5.0,7
d,6.0,7.0,8.0,8
e,0.0,0.0,0.0,9


In [148]:
frame4.fillna(0 , inplace = True)

In [149]:
frame4

Unnamed: 0,Ohio,Texas,California,std
a,0.0,1.0,2.0,4
b,6.0,6.0,4.0,5
c,3.0,4.0,5.0,7
d,6.0,7.0,8.0,8
e,0.0,0.0,0.0,9


In [151]:
frame4.dropna()

Unnamed: 0,Ohio,Texas,California,std
a,0.0,1.0,2.0,4
b,6.0,6.0,4.0,5
c,3.0,4.0,5.0,7
d,6.0,7.0,8.0,8
e,0.0,0.0,0.0,9


In [152]:
frame4

Unnamed: 0,Ohio,Texas,California,std
a,0.0,1.0,2.0,4
b,6.0,6.0,4.0,5
c,3.0,4.0,5.0,7
d,6.0,7.0,8.0,8
e,0.0,0.0,0.0,9


In [174]:
csvData = pd.read_csv('data.csv')
csvData.head(8)

Unnamed: 0,Student Code,Degree,Student Name,Mid,Quiz 1,Quiz 2,Best of Quizzes,Assignment 1,Assignment 2,Best of Assignments,Total Sessional (50),Final (50),Total (100),Grade
0,022-14-19987,BS(CS),Abdul Basit,28,8.0,3.0,8,7.0,9.0,9,45,25.0,70,B
1,022-14-110233,BS(CS),Adeel Ahmed,17,,5.0,5,8.0,10.0,10,32,18.0,50,F
2,022-14-110585,BS(CS),Afrah Zareen,18,5.0,2.0,5,8.0,10.0,10,33,30.0,63,C
3,022-14-19718,BS(CS),Ahmed Ali Raza,14,7.0,2.0,7,,2.0,2,23,23.0,46,F
4,022-14-110648,BS(CS),Ahsan Ali Vohra,27,7.0,6.0,7,7.0,9.0,9,43,34.0,77,B
5,022-14-110232,BS(CS),Ameer Hamza,25,9.0,6.0,9,8.0,10.0,10,44,27.0,71,B
6,022-14-110588,BS(CS),Anas Ali Khan,28,5.0,6.0,6,8.0,10.0,10,44,30.0,74,B
7,022-14-110388,BS(CS),Aneebullah Niazi,26,9.0,6.0,9,8.0,10.0,10,45,40.0,85,A


In [175]:
#greatr = [csvData['Best of Quizzes' > 7]]

In [178]:
csvData["Best of Quizzes"]

0     8
1     5
2     5
3     7
4     7
5     9
6     6
7     9
8     9
9     8
10    8
11    8
12    7
13    8
14    7
15    0
16    5
17    8
18    5
19    9
20    7
21    9
22    7
23    5
24    5
25    9
26    6
27    8
28    6
29    7
30    8
31    7
32    2
33    4
34    9
35    8
36    9
37    9
38    6
39    4
40    9
41    7
42    6
43    9
44    7
45    9
46    8
47    9
Name: Best of Quizzes, dtype: int64

In [183]:
a = csvData[csvData['Best of Quizzes' and 'Assignment 1'] > 7]#.count()

In [184]:
a

Unnamed: 0,Student Code,Degree,Student Name,Mid,Quiz 1,Quiz 2,Best of Quizzes,Assignment 1,Assignment 2,Best of Assignments,Total Sessional (50),Final (50),Total (100),Grade
1,022-14-110233,BS(CS),Adeel Ahmed,17,,5.0,5,8.0,10.0,10,32,18.0,50,F
2,022-14-110585,BS(CS),Afrah Zareen,18,5.0,2.0,5,8.0,10.0,10,33,30.0,63,C
5,022-14-110232,BS(CS),Ameer Hamza,25,9.0,6.0,9,8.0,10.0,10,44,27.0,71,B
6,022-14-110588,BS(CS),Anas Ali Khan,28,5.0,6.0,6,8.0,10.0,10,44,30.0,74,B
7,022-14-110388,BS(CS),Aneebullah Niazi,26,9.0,6.0,9,8.0,10.0,10,45,40.0,85,A
9,022-14-110599,BS(CS),Arsalan,28,8.0,6.0,8,8.0,,8,44,40.0,84,A
10,022-14-110214,BS(CS),Fatima Haider Warsi,30,8.0,7.0,8,8.0,,8,46,45.0,91,A
12,022-15-110994,BS(CS),Hafiza Tooba Akbani,23,7.0,5.0,7,8.0,10.0,10,40,33.0,73,B
13,022-14-110600,BS(CS),Hamza Abdul Jabbar,24,8.0,4.0,8,8.0,10.0,10,42,25.0,67,C
17,022-14-110396,BS(CS),Khalid Anwer,20,8.0,5.0,8,9.0,11.0,11,39,31.0,70,B


In [185]:
a.count()

Student Code            32
Degree                  32
Student Name            32
Mid                     32
Quiz 1                  30
Quiz 2                  31
Best of Quizzes         32
Assignment 1            32
Assignment 2            24
Best of Assignments     32
Total Sessional (50)    32
Final (50)              32
Total (100)             32
Grade                   32
dtype: int64