# 1. Import Pandas and Numpy

In [1]:
import pandas as pd
import numpy as np

## check version

In [2]:
print(pd.__version__)
print(np.__version__)

2.2.3
2.2.6


# 2. Series

In [3]:
# Series is a one-dimensional labeled array capable of holding any data type 
#         (integers, strings, floating point numbers, Python objects, etc.)

# syntax:- s = pd.Series(data, index=index)

# data can be many different things:
#    a Python dict
#    an ndarray
#    a scalar value (like 5)

## create series

In [4]:
s = pd.Series(np.random.randn(5))
s

0    0.431618
1   -0.696164
2   -1.531892
3   -1.615309
4   -0.254457
dtype: float64

## Random flot value with custom index

In [5]:
s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])
s

a    1.954894
b   -1.409660
c   -0.293630
d   -0.491727
e   -0.857245
dtype: float64

## Random int value with custom index

In [6]:
s = pd.Series(np.random.randint(0, 15, size = 5), index=["a", "b", "c", "d", "e"])
s

a    13
b    11
c     1
d     1
e    13
dtype: int32

## Series can be instantiated from list

In [7]:
lists = [2, 4, 5, 8, 3]
s = pd.Series(lists, index=["a", "b", "c", "d", "e"])
s

a    2
b    4
c    5
d    8
e    3
dtype: int64

## Series can be instantiated from dicts:

In [8]:
d = {"b": 1, 
     "a": 0, 
     "c": 2}

pd.Series(d)

b    1
a    0
c    2
dtype: int64

In [9]:
# If an index is passed, the values in data corresponding to the labels in the index will be pulled out

In [10]:
d = {"a": 0.0, 
     "b": 1.0, 
     "c": 2.0}

pd.Series(d, index=['b', 'c', 'd', 'a'])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [11]:
# NaN (not a number) is the standard missing data marker used in pandas.

In [12]:
# If data is a scalar value, an index must be provided. The value will be repeated to match the length of index.

In [13]:
pd.Series(5.0, index=["a", "b", "c", "d", "e"])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

## Series is like ndarray

In [14]:
# Series acts very similarly to a ndarray and is a valid argument to most NumPy functions

In [15]:
s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])
s

a    1.872374
b    0.167267
c    1.273957
d    0.100332
e    0.405892
dtype: float64

In [16]:
s.iloc[0]
s.iloc[3]

np.float64(0.10033208226775898)

In [17]:
s.iloc[:3]

a    1.872374
b    0.167267
c    1.273957
dtype: float64

In [18]:
s[s > s.median()]

a    1.872374
c    1.273957
dtype: float64

In [19]:
s.iloc[[4, 3, 1]]

e    0.405892
d    0.100332
b    0.167267
dtype: float64

In [20]:
# Like a NumPy array, a pandas Series has a single dtype.

In [21]:
s.dtype

dtype('float64')

In [22]:
# If you need the actual array backing a Series, use Series.array.

In [23]:
s.array

<NumpyExtensionArray>
[ np.float64(1.8723743292960566), np.float64(0.16726682777591506),
  np.float64(1.2739572635286078), np.float64(0.10033208226775898),
 np.float64(0.40589177421401745)]
Length: 5, dtype: float64

In [24]:
# if you need an actual ndarray, then use Series.to_numpy().

In [25]:
s.to_numpy()

array([1.87237433, 0.16726683, 1.27395726, 0.10033208, 0.40589177])

In [26]:
# A Series is also like a fixed-size dict in that you can get and set values by index label
s

a    1.872374
b    0.167267
c    1.273957
d    0.100332
e    0.405892
dtype: float64

In [27]:
s["a"]

np.float64(1.8723743292960566)

In [28]:
s["e"] = 12.0
s

a     1.872374
b     0.167267
c     1.273957
d     0.100332
e    12.000000
dtype: float64

In [29]:
"e" in s

True

In [30]:
"j" in s

False

In [31]:
# Using the Series.get() method, a missing label will return None or specified default

In [32]:
s.get("e")

np.float64(12.0)

In [33]:
s.get("f")

In [34]:
s.get("f", np.nan)

nan

## Vectorized operations and label alignment with Series

In [35]:
s = pd.Series(np.random.randint(5, size = 5), index = ['a', 'b', 'c', 'd', 'e'])
s

a    3
b    3
c    2
d    0
e    1
dtype: int32

In [36]:
s + s 

a    6
b    6
c    4
d    0
e    2
dtype: int32

In [37]:
s * s

a    9
b    9
c    4
d    0
e    1
dtype: int32

In [38]:
# "s.iloc[1:]" This gives you everything except the first value
# "s.iloc[:-1]" This gives you everything except the last value:

In [39]:
s.iloc[1:] + s.iloc[:-1]

a    NaN
b    6.0
c    4.0
d    0.0
e    NaN
dtype: float64

## Name attribute

In [40]:
s = pd.Series(np.random.randint(5, size=5), name="something")
print(s)
print(s.name)

0    2
1    3
2    3
3    1
4    2
Name: something, dtype: int32
something


In [41]:
# We can rename a Series with the pandas.Series.rename() method.

In [42]:
s2 = s
s2.name

'something'

In [43]:
s2 = s.rename("different")
s2

0    2
1    3
2    3
3    1
4    2
Name: different, dtype: int32

In [44]:
s.name

'something'

In [45]:
s.rename('laksh')

0    2
1    3
2    3
3    1
4    2
Name: laksh, dtype: int32

In [46]:
# Note that s and s2 refer to different objects.

## get infomation of Series

In [47]:
print(s.dtype)
print('.')
print(s.ndim)
print('.')
print(s.size)
print('.')
print(s.name)
print('.')
print(s.hasnans)
print('.')
print(s.index)
print('.')
print(s.head(2))
print('.')
print(s.tail(2))
print('.')
print(s.info)

int32
.
1
.
5
.
something
.
False
.
RangeIndex(start=0, stop=5, step=1)
.
0    2
1    3
Name: something, dtype: int32
.
3    1
4    2
Name: something, dtype: int32
.
<bound method Series.info of 0    2
1    3
2    3
3    1
4    2
Name: something, dtype: int32>


## Merge two Series

In [137]:
combined = pd.concat([s1, s2])
print(combined)

a    1
b    2
c    3
a    4
b    5
c    6
dtype: int64


In [135]:
import pandas as pd

s1 = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
s2 = pd.Series([4, 5, 6], index=['a', 'b', 'c'])

df = pd.concat([s1, s2], axis=1)
df.columns = ['Series1', 'Series2']
print(df)


   Series1  Series2
a        1        4
b        2        5
c        3        6


## unique categories

In [149]:
s = pd.Series(['p', 'q', 'r', 's', 'q'], dtype="category")
s = s.cat.add_categories(['t','l'])
s

0    p
1    q
2    r
3    s
4    q
dtype: category
Categories (6, object): ['p', 'q', 'r', 's', 't', 'l']

## Series Releted all function

In [151]:
import pandas as pd
import numpy as np

# Sample Series with one missing value (NaN)
# We use this same series for all functions so you can compare the results easily.
s = pd.Series([10, 20, 30, None, 40, 50])
print("Original Series:\n", s, "\n")

# 1. count() → Counts only non-missing (non-NaN) values
print("count       →", s.count())

# 2. sum() → Adds up all non-missing values in the series
print("sum         →", s.sum())

# 3. mean() → Calculates the average (sum of values ÷ count)
print("mean        →", s.mean())

# 4. median() → Finds the middle value when data is sorted
print("median      →", s.median())

# 5. min() → Finds the smallest value in the series
print("min         →", s.min())

# 6. max() → Finds the largest value in the series
print("max         →", s.max())

# 7. mode() → Returns the most frequently occurring value(s)
print("mode        →", s.mode().tolist())

# 8. abs() → Returns the absolute value of each element
# (negative numbers become positive, positive numbers stay same)
print("abs         →\n", s.abs())

# 9. prod() → Multiplies all non-missing values together
print("prod        →", s.prod(skipna=True))

# 10. std() → Standard deviation (how spread out the values are from the mean)
print("std         →", s.std())

# 11. var() → Variance (square of standard deviation, measure of spread)
print("var         →", s.var())

# 12. sem() → Standard error of the mean (std ÷ sqrt(count)), used in statistics
print("sem         →", s.sem())

# 13. skew() → Measures asymmetry of the data distribution
# (0 means perfectly symmetrical, positive means right-skewed, negative means left-skewed)
print("skew        →", s.skew())

# 14. kurt() → Measures the 'tailedness' of the distribution
# (positive = heavy tails, negative = light tails compared to normal distribution)
print("kurt        →", s.kurt())

# 15. quantile(p) → Returns the value at the given percentile 'p'
# e.g., 0.5 means 50% (median), 0.25 means 25% (1st quartile)
print("quantile(0.5) →", s.quantile(0.5))

# 16. cumsum() → Cumulative sum (adds each value to all previous ones)
print("cumsum      →\n", s.cumsum())

# 17. cumprod() → Cumulative product (multiplies each value with all previous ones)
print("cumprod     →\n", s.cumprod())

# 18. cummax() → Cumulative maximum (largest value so far at each position)
print("cummax      →\n", s.cummax())

# 19. cummin() → Cumulative minimum (smallest value so far at each position)
print("cummin      →\n", s.cummin())

# 20. value_counts() → Counts how many times each unique value appears
print("value_counts→\n", s.value_counts())

# 21. unique() → Returns all unique values (NaN is included if present)
print("unique      →", s.unique())

# 22. nlargest(n) → Returns the top 'n' largest values
print("nlargest(3) →\n", s.nlargest(3))

# 23. nsmallest(n) → Returns the top 'n' smallest values
print("nsmallest(3)→\n", s.nsmallest(3))


Original Series:
 0    10.0
1    20.0
2    30.0
3     NaN
4    40.0
5    50.0
dtype: float64 

count       → 5
sum         → 150.0
mean        → 30.0
median      → 30.0
min         → 10.0
max         → 50.0
mode        → [10.0, 20.0, 30.0, 40.0, 50.0]
abs         →
 0    10.0
1    20.0
2    30.0
3     NaN
4    40.0
5    50.0
dtype: float64
prod        → 12000000.0
std         → 15.811388300841896
var         → 250.0
sem         → 7.071067811865475
skew        → 0.0
kurt        → -1.2000000000000002
quantile(0.5) → 30.0
cumsum      →
 0     10.0
1     30.0
2     60.0
3      NaN
4    100.0
5    150.0
dtype: float64
cumprod     →
 0          10.0
1         200.0
2        6000.0
3           NaN
4      240000.0
5    12000000.0
dtype: float64
cummax      →
 0    10.0
1    20.0
2    30.0
3     NaN
4    40.0
5    50.0
dtype: float64
cummin      →
 0    10.0
1    10.0
2    10.0
3     NaN
4    10.0
5    10.0
dtype: float64
value_counts→
 10.0    1
20.0    1
30.0    1
40.0    1
50.0    1
Name: co

# 3. DataFrame

In [48]:
# DataFrame is a 2-dimensional labeled data structure with columns of potentially different types. 
# You can think of it like a spreadsheet or SQL table, or a dict of Series objects.

## DataFrame accepts many different kinds of input:
###   i. Dict of Series, 1D ndarrays, lists, dicts, or tuples
###   ii. A Series
###   iiI. 2-D numpy.ndarray
###   iv. Another DataFrame

## From dict from Series

In [49]:
d = {
    "one": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
    "two": pd.Series([1.0, 2.0, 3.0, 4.0], index=["a", "b", "c", "d"]),
}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [50]:
pd.DataFrame(d, index=["d", "b", "a"])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [51]:
pd.DataFrame(d, index=["d", "b", "a"], columns=["two", "one"])

Unnamed: 0,two,one
d,4.0,
b,2.0,2.0
a,1.0,1.0


In [52]:
pd.DataFrame(d, index=["d", "b", "a"], columns=["two", "three"])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [53]:
# When a particular set of columns is passed along with a dict of data, 
#                     the passed columns override the keys in the dict.

In [54]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [55]:
df.columns

Index(['one', 'two'], dtype='object')

## From dict of ndarrays / lists

In [56]:
# All ndarrays must share the same length. If an index is passed, it must also be the same length as the arrays.
# If no index is passed, the result will be range(n), where n is the array length.

In [57]:
d = {"one": [1.0, 2.0, 3.0, 4.0], 
     "two": [4.0, 3.0, 2.0, 1.0]}
d

{'one': [1.0, 2.0, 3.0, 4.0], 'two': [4.0, 3.0, 2.0, 1.0]}

In [58]:
pd.DataFrame(d, index=["a", "b", "c", "d"])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [59]:
# DataFrame is not intended to work exactly like a 2-dimensional NumPy ndarray.

## From a list of dicts

In [60]:
data2 = [{"a": 1, "b": 2}, {"a": 5, "b": 10, "c": 20}]
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [61]:
pd.DataFrame(data2, index=["first", "second"])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [62]:
pd.DataFrame(data2, columns=["b", "a"])

Unnamed: 0,b,a
0,2,1
1,10,5


## From a dict of tuples

In [63]:
pd.DataFrame(
    {
        ("a", "b"): {("A", "B"): 1, ("A", "C"): 2},
        ("a", "a"): {("A", "C"): 3, ("A", "B"): 4},
        ("a", "c"): {("A", "B"): 5, ("A", "C"): 6},
        ("b", "a"): {("A", "C"): 7, ("A", "B"): 8},
        ("b", "b"): {("A", "D"): 9, ("A", "B"): 10},
    }
)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,b,a,c,a,b
A,B,1.0,4.0,5.0,8.0,10.0
A,C,2.0,3.0,6.0,7.0,
A,D,,,,,9.0


## From a Series

In [64]:
ser = pd.Series(range(3), index=list("abc"), name="ser")
ser

a    0
b    1
c    2
Name: ser, dtype: int64

In [65]:
pd.DataFrame(ser)

Unnamed: 0,ser
a,0
b,1
c,2


# 4. Column selection, addition, deletion in Dataframe

## Create

In [102]:
d = {
    "one": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
    "two": pd.Series([4.0, 5.0, 6.0, 7.0], index=["a", "b", "c", "d"]),
}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,5.0
c,3.0,6.0
d,,7.0


# Select

In [103]:
df['two']

a    4.0
b    5.0
c    6.0
d    7.0
Name: two, dtype: float64

In [104]:
df.loc['b']

one    2.0
two    5.0
Name: b, dtype: float64

In [105]:
df.iloc[2]

one    3.0
two    6.0
Name: c, dtype: float64

In [110]:
value = df.loc['c', 'two']
value

np.float64(6.0)

In [113]:
value = df.iloc[1, 1]
value

np.float64(5.0)

In [114]:
# Interate Dataframe to display the columns
import pandas as pd

data = {
    'Student': ["Laksh", "Amit", "John", "Jakob", "Devid", "Steve"],
    'Rank': [1, 2, 3, 4, 5, 6],
    'Marks': [99, 98, 97, 96, 95, 94],
}

# Use the index argument to set your Index. 
df = pd.DataFrame(data, index=["Student_1", "Student_2", "Student_3", "Student_4", "Student_5", "Student_6"])
print(df)

for col in df:
    print(col)
    print(df[col].values)

          Student  Rank  Marks
Student_1   Laksh     1     99
Student_2    Amit     2     98
Student_3    John     3     97
Student_4   Jakob     4     96
Student_5   Devid     5     95
Student_6   Steve     6     94
Student
['Laksh' 'Amit' 'John' 'Jakob' 'Devid' 'Steve']
Rank
[1 2 3 4 5 6]
Marks
[99 98 97 96 95 94]


## Adding

In [70]:
df["three"] = df["one"] * df["two"]
df

Unnamed: 0,one,two,three
a,1.0,4.0,4.0
b,2.0,5.0,10.0
c,3.0,6.0,18.0
d,,7.0,


In [71]:
df["flag"] = df["one"] > 2
df

Unnamed: 0,one,two,three,flag
a,1.0,4.0,4.0,False
b,2.0,5.0,10.0,False
c,3.0,6.0,18.0,True
d,,7.0,,False


## Delete

In [72]:
del df["two"]
df

Unnamed: 0,one,three,flag
a,1.0,4.0,False
b,2.0,10.0,False
c,3.0,18.0,True
d,,,False


In [73]:
df["foo"] = "bar"
df

Unnamed: 0,one,three,flag,foo
a,1.0,4.0,False,bar
b,2.0,10.0,False,bar
c,3.0,18.0,True,bar
d,,,False,bar


In [74]:
df["one_trunc"] = df["one"][:2]
df

Unnamed: 0,one,three,flag,foo,one_trunc
a,1.0,4.0,False,bar,1.0
b,2.0,10.0,False,bar,2.0
c,3.0,18.0,True,bar,
d,,,False,bar,


## Insert Row

In [75]:
# You can insert raw ndarrays but their length must match the length of the DataFrame’s index.

In [76]:
# By default, columns get inserted at the end.

In [77]:
df.loc['e'] = {
    'one': 4.0,
    'three': 20.0,
    'flag': True,
    'foo': 'bar',
    'one_trunc': 4.0
}
df

Unnamed: 0,one,three,flag,foo,one_trunc
a,1.0,4.0,False,bar,1.0
b,2.0,10.0,False,bar,2.0
c,3.0,18.0,True,bar,
d,,,False,bar,
e,4.0,20.0,True,bar,4.0


## DataFrame Attribute

In [115]:
data = {
    'Student': ["Laksh", "Amit", "John", "Jakob", "Devid", "Steve"],
    'Rank': [1, 2, 3, 4, 5, 6],
    'Marks': [99, 98, 97, 96, 95, 94],
}

# Use the index argument to set your Index. 
df = pd.DataFrame(data, index=["Student_1", "Student_2", "Student_3", "Student_4", "Student_5", "Student_6"])
print(df)

          Student  Rank  Marks
Student_1   Laksh     1     99
Student_2    Amit     2     98
Student_3    John     3     97
Student_4   Jakob     4     96
Student_5   Devid     5     95
Student_6   Steve     6     94


In [120]:
print(df.dtypes)
print('.')
print(df.ndim)
print('.')
print(df.size)
print('.')
print(df.shape)
print('.')
print(df.index)
print('.')
print(df.T)
print('.')
print(df.head(2))
print('.')
print(df.tail(2))

Student    object
Rank        int64
Marks       int64
dtype: object
.
2
.
18
.
(6, 3)
.
Index(['Student_1', 'Student_2', 'Student_3', 'Student_4', 'Student_5',
       'Student_6'],
      dtype='object')
.
        Student_1 Student_2 Student_3 Student_4 Student_5 Student_6
Student     Laksh      Amit      John     Jakob     Devid     Steve
Rank            1         2         3         4         5         6
Marks          99        98        97        96        95        94
.
          Student  Rank  Marks
Student_1   Laksh     1     99
Student_2    Amit     2     98
.
          Student  Rank  Marks
Student_5   Devid     5     95
Student_6   Steve     6     94


## Data alignment and arithmetic

In [122]:
df = pd.DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"])

df2 = pd.DataFrame(np.random.randn(7, 3), columns=["A", "B", "C"])
print(df)
print('.')
print(df2)

          A         B         C         D
0  0.903223  0.950118  0.044327 -1.055534
1 -0.886110  0.076140  0.960511  0.283844
2 -0.078336  0.644232 -0.576075 -1.880245
3  0.066664 -0.217477  1.156130  1.372837
4 -1.008774 -0.011131 -0.847222  0.535669
5  0.024618  0.074701 -0.874138  0.299191
6  0.102853  0.885031  0.795925  1.223513
7  0.158203  0.376486  0.805186  0.107284
8 -1.331448  1.338056  2.411440 -1.236057
9  0.446311 -2.417250 -1.704313  0.211457
.
          A         B         C
0 -0.600594  0.362277 -0.594227
1 -0.227778  1.784921 -0.399010
2  0.201326  0.284135  0.863718
3 -0.241675 -0.848118 -0.566627
4 -1.126482 -0.646305  0.061900
5 -0.492923 -0.052796  0.000546
6 -0.261949  0.657813 -0.304124


In [123]:
df + df2

Unnamed: 0,A,B,C,D
0,0.30263,1.312395,-0.5499,
1,-1.113888,1.861061,0.561501,
2,0.12299,0.928368,0.287643,
3,-0.175011,-1.065595,0.589503,
4,-2.135256,-0.657435,-0.785323,
5,-0.468305,0.021904,-0.873592,
6,-0.159095,1.542845,0.491801,
7,,,,
8,,,,
9,,,,


In [124]:
df * 5 + 2

Unnamed: 0,A,B,C,D
0,6.516117,6.750589,2.221633,-3.277671
1,-2.43055,2.3807,6.802557,3.419221
2,1.60832,5.221162,-0.880377,-7.401224
3,2.333321,0.912614,7.78065,8.864187
4,-3.043869,1.944347,-2.236111,4.678346
5,2.12309,2.373503,-2.370689,3.495953
6,2.514267,6.425157,5.979624,8.117567
7,2.791015,3.882428,6.02593,2.536421
8,-4.657238,8.69028,14.057199,-4.180285
9,4.231553,-10.086248,-6.521565,3.057284


In [125]:
1 / df

Unnamed: 0,A,B,C,D
0,1.107146,1.052501,22.559858,-0.947388
1,-1.128528,13.13369,1.041112,3.52306
2,-12.765527,1.552235,-1.735884,-0.531846
3,15.000575,-4.598185,0.864955,0.728418
4,-0.991303,-89.843062,-1.180328,1.866823
5,40.620539,13.386778,-1.143984,3.342352
6,9.722571,1.129903,1.2564,0.817318
7,6.320996,2.656144,1.241949,9.321033
8,-0.751062,0.747353,0.41469,-0.809024
9,2.240592,-0.413693,-0.586747,4.7291


### boolean Operation

In [127]:
df1 = pd.DataFrame({"a": [1, 0, 1], "b": [0, 1, 1]}, dtype=bool)

df2 = pd.DataFrame({"a": [0, 1, 1], "b": [1, 1, 0]}, dtype=bool)

print(df1)
print('.')
print(df2)

       a      b
0   True  False
1  False   True
2   True   True
.
       a      b
0  False   True
1   True   True
2   True  False


In [128]:
df1 & df2

Unnamed: 0,a,b
0,False,False
1,False,True
2,True,False


In [129]:
df1 | df2

Unnamed: 0,a,b
0,True,True
1,True,True
2,True,True


In [130]:
df[:5].T

Unnamed: 0,0,1,2,3,4
A,0.903223,-0.88611,-0.078336,0.066664,-1.008774
B,0.950118,0.07614,0.644232,-0.217477,-0.011131
C,0.044327,0.960511,-0.576075,1.15613,-0.847222
D,-1.055534,0.283844,-1.880245,1.372837,0.535669


## Merge DataFrame

In [131]:
import pandas as pd

data1 = {
    'id': ["S01", "S02", "S03", "S04", "S05"],
    'Student': ["Laksh", "Taksh", "Daksh", "Taksh", "Darsh"],
    'Roll': [101, 102, 103, 104, 105],
}

data2 = {
    'Rank': [3, 2, 4, 5, 1],
    'Marks': [12, 35, 53, 45, 23]
}

dataFrame1 = pd.DataFrame(data1)
dataFrame2 = pd.DataFrame(data2)


resDf = dataFrame1.join(dataFrame2)
print(resDf)

print(resDf.T)

    id Student  Roll  Rank  Marks
0  S01   Laksh   101     3     12
1  S02   Taksh   102     2     35
2  S03   Daksh   103     4     53
3  S04   Taksh   104     5     45
4  S05   Darsh   105     1     23
             0      1      2      3      4
id         S01    S02    S03    S04    S05
Student  Laksh  Taksh  Daksh  Taksh  Darsh
Roll       101    102    103    104    105
Rank         3      2      4      5      1
Marks       12     35     53     45     23


In [141]:
import pandas as pd

s1 = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
s2 = pd.Series([4, 5, 6], index=['a', 'b', 'c'])

df = pd.concat([s1, s2], axis=1)
df.columns = ['Series1', 'Series2']

print(df)


   Series1  Series2
a        1        4
b        2        5
c        3        6


In [176]:
import pandas as pd

data1 = {
    'id': ["S01", "S02", "S03", "S04", "S05"],
    'Student': ["Laksh", "Taksh", "Daksh", "Taksh", "Darsh"],
    'Roll': [101, 102, 103, 104, 105],
}

data2 = {
    'id': ["S06", "S07", "S08", "S09", "S10"],
    'Student': ["Paresh", "Jayesh", "Suresh", "Rajesh", "Naresh"],
    'Roll': [106, 107, 108, 109, 110],
}

dataFrame1 = pd.DataFrame(data1, index=["Student_1", "Student_2", "Student_3", "Student_4", "Student_5",])
dataFrame2 = pd.DataFrame(data2, index=[ "Student_6", "Student_7", "Student_8", "Student_9", "Student_10",])

resDf = pd.concat([dataFrame1, dataFrame2])
print(resDf)

             id Student  Roll
Student_1   S01   Laksh   101
Student_2   S02   Taksh   102
Student_3   S03   Daksh   103
Student_4   S04   Taksh   104
Student_5   S05   Darsh   105
Student_6   S06  Paresh   106
Student_7   S07  Jayesh   107
Student_8   S08  Suresh   108
Student_9   S09  Rajesh   109
Student_10  S10  Naresh   110


# 5. Essential basic functionality

In [177]:
index = pd.date_range("1/1/2000", periods=8)

s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])

df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=["A", "B", "C"])
print(df)
val = df.to_numpy()
print(val)

                   A         B         C
2000-01-01 -0.827602  0.131651  0.549217
2000-01-02  1.883536  0.284124  0.321259
2000-01-03 -0.434134 -1.343674 -0.116258
2000-01-04  1.043556  1.352410  1.492259
2000-01-05  0.327328 -0.475475  0.580882
2000-01-06  0.292708 -0.956625  1.403021
2000-01-07 -0.021337 -0.566926  2.152724
2000-01-08 -0.973232  0.776580 -0.326176
[[-0.82760155  0.13165115  0.54921689]
 [ 1.88353574  0.28412365  0.32125885]
 [-0.43413393 -1.34367401 -0.11625765]
 [ 1.04355638  1.35241033  1.49225906]
 [ 0.32732788 -0.47547503  0.58088218]
 [ 0.29270779 -0.95662517  1.40302096]
 [-0.02133748 -0.56692598  2.15272354]
 [-0.97323189  0.77658    -0.3261759 ]]


## DataFrame Functions

In [178]:
import pandas as pd
import numpy as np

# Sample dataset for demonstration
data = {
    "Name": ["Aarav", "Diya", "Kunal", "Meera", "Rohan", "Isha", "Aryan", "Tanvi"],
    "Department": ["HR", "IT", "Finance", "IT", "Finance", "HR", "Finance", "IT"],
    "Salary": [50000, 60000, 75000, 65000, 70000, 52000, 72000, 58000],
    "Joining_Year": [2018, 2019, 2017, 2020, 2018, 2019, 2017, 2020]
}

df = pd.DataFrame(data)

# ==============================
# 1. head()
# ==============================
# PURPOSE: Returns the first 'n' rows of the DataFrame.
# WHY USE IT? To quickly preview the start of your dataset without printing all rows.
print("1. head() →\n", df.head(), "\n")

# ==============================
# 2. tail()
# ==============================
# PURPOSE: Returns the last 'n' rows of the DataFrame.
# WHY USE IT? Useful to see the ending records and check dataset completeness.
print("2. tail() →\n", df.tail(), "\n")

# ==============================
# 3. info()
# ==============================
# PURPOSE: Displays DataFrame structure, column names, data types, and null count.
# WHY USE IT? Helps in understanding dataset metadata before analysis.
print("3. info() →")
print(df.info(), "\n")

# ==============================
# 4. describe()
# ==============================
# PURPOSE: Gives statistical summary for numeric columns.
# WHY USE IT? Quickly checks min, max, mean, quartiles for data insights.
print("4. describe() →\n", df.describe(), "\n")

# ==============================
# 5. shape
# ==============================
# PURPOSE: Returns a tuple (rows, columns) showing dataset dimensions.
# WHY USE IT? Essential for understanding dataset size.
print("5. shape →", df.shape, "\n")

# ==============================
# 6. columns
# ==============================
# PURPOSE: Lists all column names.
# WHY USE IT? Useful when you forget column spelling or want to rename them.
print("6. columns →", df.columns, "\n")

# ==============================
# 7. index
# ==============================
# PURPOSE: Returns the index (row labels) of the DataFrame.
# WHY USE IT? Helps in row selection and indexing operations.
print("7. index →", df.index, "\n")

# ==============================
# 8. dtypes
# ==============================
# PURPOSE: Shows the data type for each column.
# WHY USE IT? Ensures correct types for calculations and operations.
print("8. dtypes →\n", df.dtypes, "\n")

# ==============================
# 9. sort_values()
# ==============================
# PURPOSE: Sorts rows by one or more columns.
# WHY USE IT? Helps in ranking, ordering, or prioritizing data.
print("9. sort_values('Salary') →\n", df.sort_values(by="Salary"), "\n")

# ==============================
# 10. groupby()
# ==============================
# PURPOSE: Groups data based on a column and performs aggregate calculations.
# WHY USE IT? Summarizes large datasets by category.
print("10. groupby('Department').mean() →\n", df.groupby("Department").mean(numeric_only=True), "\n")

# ==============================
# 11. value_counts()
# ==============================
# PURPOSE: Counts the occurrence of each unique value in a column.
# WHY USE IT? Great for frequency distribution analysis.
print("11. value_counts('Department') →\n", df["Department"].value_counts(), "\n")

# ==============================
# 12. isnull()
# ==============================
# PURPOSE: Checks for missing values (NaN) in dataset.
# WHY USE IT? Important for data cleaning before analysis.
print("12. isnull() →\n", df.isnull(), "\n")

# ==============================
# 13. dropna()
# ==============================
# PURPOSE: Removes rows or columns with missing values.
# WHY USE IT? Keeps dataset clean by removing incomplete records.
print("13. dropna() →\n", df.dropna(), "\n")

# ==============================
# 14. fillna()
# ==============================
# PURPOSE: Replaces NaN values with a specified value.
# WHY USE IT? Retains data completeness without dropping rows.
print("14. fillna(0) →\n", df.fillna(0), "\n")

# ==============================
# 15. apply()
# ==============================
# PURPOSE: Applies a function to each column or row.
# WHY USE IT? Useful for custom calculations.
print("15. apply(lambda x: x) →\n", df.apply(lambda x: x), "\n")

# ==============================
# 16. pivot_table()
# ==============================
# PURPOSE: Creates a pivot table for summarizing data.
# WHY USE IT? Best for multidimensional data analysis.
print("16. pivot_table() →\n", df.pivot_table(values="Salary", index="Department", aggfunc="mean"), "\n")

# ==============================
# 17. duplicated()
# ==============================
# PURPOSE: Detects duplicate rows.
# WHY USE IT? Helps in data cleaning by finding repeated entries.
print("17. duplicated() →\n", df.duplicated(), "\n")

# ==============================
# 18. drop_duplicates()
# ==============================
# PURPOSE: Removes duplicate rows.
# WHY USE IT? Ensures data uniqueness.
print("18. drop_duplicates() →\n", df.drop_duplicates(), "\n")

# ==============================
# 19. rename()
# ==============================
# PURPOSE: Renames one or more columns.
# WHY USE IT? Improves column readability and correctness.
print("19. rename(columns={'Name': 'Full_Name'}) →\n", df.rename(columns={"Name": "Full_Name"}), "\n")

# ==============================
# 20. set_index()
# ==============================
# PURPOSE: Sets a column as the index of the DataFrame.
# WHY USE IT? Useful for indexed data access and better row labeling.
print("20. set_index('Name') →\n", df.set_index("Name"), "\n")


1. head() →
     Name Department  Salary  Joining_Year
0  Aarav         HR   50000          2018
1   Diya         IT   60000          2019
2  Kunal    Finance   75000          2017
3  Meera         IT   65000          2020
4  Rohan    Finance   70000          2018 

2. tail() →
     Name Department  Salary  Joining_Year
3  Meera         IT   65000          2020
4  Rohan    Finance   70000          2018
5   Isha         HR   52000          2019
6  Aryan    Finance   72000          2017
7  Tanvi         IT   58000          2020 

3. info() →
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Name          8 non-null      object
 1   Department    8 non-null      object
 2   Salary        8 non-null      int64 
 3   Joining_Year  8 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 388.0+ bytes
None 

4. describe() →
              Salary  Jo

In [179]:
df

Unnamed: 0,Name,Department,Salary,Joining_Year
0,Aarav,HR,50000,2018
1,Diya,IT,60000,2019
2,Kunal,Finance,75000,2017
3,Meera,IT,65000,2020
4,Rohan,Finance,70000,2018
5,Isha,HR,52000,2019
6,Aryan,Finance,72000,2017
7,Tanvi,IT,58000,2020


In [185]:
check = pd.DataFrame({
    "Name": ["Aarav", "Diya", "Kunal", "Meera", "Rohan", "Isha"]
})
check

Unnamed: 0,Name
0,Aarav
1,Diya
2,Kunal
3,Meera
4,Rohan
5,Isha


In [187]:
for i, j in check.iterrows():
    print(i)
    print(j)

0
Name    Aarav
Name: 0, dtype: object
1
Name    Diya
Name: 1, dtype: object
2
Name    Kunal
Name: 2, dtype: object
3
Name    Meera
Name: 3, dtype: object
4
Name    Rohan
Name: 4, dtype: object
5
Name    Isha
Name: 5, dtype: object


In [184]:
for i, j in df.iterrows():
    print(i)
    print(j)

0
Name            Aarav
Department         HR
Salary          50000
Joining_Year     2018
Name: 0, dtype: object
1
Name             Diya
Department         IT
Salary          60000
Joining_Year     2019
Name: 1, dtype: object
2
Name              Kunal
Department      Finance
Salary            75000
Joining_Year       2017
Name: 2, dtype: object
3
Name            Meera
Department         IT
Salary          65000
Joining_Year     2020
Name: 3, dtype: object
4
Name              Rohan
Department      Finance
Salary            70000
Joining_Year       2018
Name: 4, dtype: object
5
Name             Isha
Department         HR
Salary          52000
Joining_Year     2019
Name: 5, dtype: object
6
Name              Aryan
Department      Finance
Salary            72000
Joining_Year       2017
Name: 6, dtype: object
7
Name            Tanvi
Department         IT
Salary          58000
Joining_Year     2020
Name: 7, dtype: object


In [181]:
for i, row in df.iterrows():
    print(f"Index: {i}")
    print(f"Name: {row['Name']}")
    print(f"Department: {row['Department']}")
    print(f"Salary: {row['Salary']}")
    print(f"Joining Year: {row['Joining_Year']}")
    print("------")


Index: 0
Name: Aarav
Department: HR
Salary: 50000
Joining Year: 2018
------
Index: 1
Name: Diya
Department: IT
Salary: 60000
Joining Year: 2019
------
Index: 2
Name: Kunal
Department: Finance
Salary: 75000
Joining Year: 2017
------
Index: 3
Name: Meera
Department: IT
Salary: 65000
Joining Year: 2020
------
Index: 4
Name: Rohan
Department: Finance
Salary: 70000
Joining Year: 2018
------
Index: 5
Name: Isha
Department: HR
Salary: 52000
Joining Year: 2019
------
Index: 6
Name: Aryan
Department: Finance
Salary: 72000
Joining Year: 2017
------
Index: 7
Name: Tanvi
Department: IT
Salary: 58000
Joining Year: 2020
------


## Date and time

In [191]:
s = pd.Series(pd.date_range("20130101 09:10:12", periods=4))
print(s)
print('.')
print(s.dt.day)
print('.')
print(s.dt.hour)
print('.')
print(s.dt.second)

0   2013-01-01 09:10:12
1   2013-01-02 09:10:12
2   2013-01-03 09:10:12
3   2013-01-04 09:10:12
dtype: datetime64[ns]
.
0    1
1    2
2    3
3    4
dtype: int32
.
0    9
1    9
2    9
3    9
dtype: int32
.
0    12
1    12
2    12
3    12
dtype: int32


## smallest / largest values

In [192]:
s = pd.Series(np.random.permutation(10))
s

0    8
1    7
2    4
3    2
4    6
5    9
6    5
7    1
8    3
9    0
dtype: int32

In [196]:
print(s.sort_values())
print('.')
print(s.nsmallest(3))
print('.')
print(s.nlargest(3))

9    0
7    1
3    2
8    3
2    4
6    5
4    6
1    7
0    8
5    9
dtype: int32
.
9    0
7    1
3    2
dtype: int32
.
5    9
0    8
1    7
dtype: int32


In [197]:
df = pd.DataFrame(
    {
        "a": [-2, -1, 1, 10, 8, 11, -1],
        "b": list("abdceff"),
        "c": [1.0, 2.0, 4.0, 3.2, np.nan, 3.0, 4.0],
    }
)

df

Unnamed: 0,a,b,c
0,-2,a,1.0
1,-1,b,2.0
2,1,d,4.0
3,10,c,3.2
4,8,e,
5,11,f,3.0
6,-1,f,4.0


In [198]:
df.nlargest(3, "a")

Unnamed: 0,a,b,c
5,11,f,3.0
3,10,c,3.2
4,8,e,


In [199]:
df.nsmallest(3, "a")

Unnamed: 0,a,b,c
0,-2,a,1.0
1,-1,b,2.0
6,-1,f,4.0


In [200]:
df.nlargest(5, ["a", "c"])

Unnamed: 0,a,b,c
5,11,f,3.0
3,10,c,3.2
4,8,e,
2,1,d,4.0
6,-1,f,4.0


In [202]:
df.nsmallest(5, ["a", "c"])

Unnamed: 0,a,b,c
0,-2,a,1.0
1,-1,b,2.0
6,-1,f,4.0
2,1,d,4.0
4,8,e,


# 6. Best way to select data

In [88]:
dates = pd.date_range("2000-01-01", periods=8)
s = pd.Series(np.random.randn(8), index=dates, name="A")
print(s)


2000-01-01   -0.339264
2000-01-02   -0.103047
2000-01-03    0.849349
2000-01-04   -0.128944
2000-01-05   -0.660975
2000-01-06    0.841279
2000-01-07   -0.054106
2000-01-08   -0.073643
Freq: D, Name: A, dtype: float64


In [89]:
s[:5]

2000-01-01   -0.339264
2000-01-02   -0.103047
2000-01-03    0.849349
2000-01-04   -0.128944
2000-01-05   -0.660975
Freq: D, Name: A, dtype: float64

In [90]:
s[::2]

2000-01-01   -0.339264
2000-01-03    0.849349
2000-01-05   -0.660975
2000-01-07   -0.054106
Freq: 2D, Name: A, dtype: float64

In [91]:
s[::-1]

2000-01-08   -0.073643
2000-01-07   -0.054106
2000-01-06    0.841279
2000-01-05   -0.660975
2000-01-04   -0.128944
2000-01-03    0.849349
2000-01-02   -0.103047
2000-01-01   -0.339264
Freq: -1D, Name: A, dtype: float64

In [95]:
s2 = s.copy()
s2[:5] = 0
s2

2000-01-01    0.000000
2000-01-02    0.000000
2000-01-03    0.000000
2000-01-04    0.000000
2000-01-05    0.000000
2000-01-06    0.841279
2000-01-07   -0.054106
2000-01-08   -0.073643
Freq: D, Name: A, dtype: float64

In [96]:
df[:3]

NameError: name 'df' is not defined

In [97]:
dates = pd.date_range("2000-01-01", periods=8)

df = pd.DataFrame(
    np.random.randn(8, 4),  # 8 rows, 4 columns
    index=dates,
    columns=list("ABCD")
)

print(df)


                   A         B         C         D
2000-01-01 -1.129927  0.266282 -0.780818 -0.126935
2000-01-02 -0.021214 -0.371718 -0.708361  0.672822
2000-01-03 -0.176185  1.543052  1.360799  0.789936
2000-01-04  0.057946  0.518006 -0.272201 -1.201946
2000-01-05 -0.315901  0.291918 -0.413203 -0.318047
2000-01-06  1.257256  0.285180 -0.953190 -0.413126
2000-01-07  2.890086  0.501026 -1.079980 -0.799123
2000-01-08 -0.258022  1.090906 -0.756115  1.981814


In [98]:
df[:3]

Unnamed: 0,A,B,C,D
2000-01-01,-1.129927,0.266282,-0.780818,-0.126935
2000-01-02,-0.021214,-0.371718,-0.708361,0.672822
2000-01-03,-0.176185,1.543052,1.360799,0.789936


In [99]:
df[::-1]

Unnamed: 0,A,B,C,D
2000-01-08,-0.258022,1.090906,-0.756115,1.981814
2000-01-07,2.890086,0.501026,-1.07998,-0.799123
2000-01-06,1.257256,0.28518,-0.95319,-0.413126
2000-01-05,-0.315901,0.291918,-0.413203,-0.318047
2000-01-04,0.057946,0.518006,-0.272201,-1.201946
2000-01-03,-0.176185,1.543052,1.360799,0.789936
2000-01-02,-0.021214,-0.371718,-0.708361,0.672822
2000-01-01,-1.129927,0.266282,-0.780818,-0.126935


In [100]:
df1 = pd.DataFrame(np.random.randn(6, 4),
                   index=list('abcdef'),
                   columns=list('ABCD'))
df1

Unnamed: 0,A,B,C,D
a,-0.366946,0.356091,-1.340824,0.035219
b,-0.694481,-0.05524,-0.50141,0.141266
c,-0.488321,0.930629,-0.064162,-0.959352
d,1.375131,0.787081,0.554696,0.125181
e,-0.904974,0.537836,-0.314166,-0.401621
f,-1.740104,-0.70228,0.618564,0.882027


In [101]:
df1.loc[['a', 'b', 'd'], :]

Unnamed: 0,A,B,C,D
a,-0.366946,0.356091,-1.340824,0.035219
b,-0.694481,-0.05524,-0.50141,0.141266
d,1.375131,0.787081,0.554696,0.125181


In [102]:
df1.loc['a']

A   -0.366946
B    0.356091
C   -1.340824
D    0.035219
Name: a, dtype: float64

In [103]:
df1.loc['a'] > 0

A    False
B     True
C    False
D     True
Name: a, dtype: bool

In [104]:
df1.loc[:, df1.loc['a'] > 0]

Unnamed: 0,B,D
a,0.356091,0.035219
b,-0.05524,0.141266
c,0.930629,-0.959352
d,0.787081,0.125181
e,0.537836,-0.401621
f,-0.70228,0.882027


In [109]:
mask = pd.array([True, False, True, False, pd.NA, True], dtype="boolean")
mask

<BooleanArray>
[True, False, True, False, <NA>, True]
Length: 6, dtype: boolean

In [110]:
df1[mask]

Unnamed: 0,A,B,C,D
a,-0.366946,0.356091,-1.340824,0.035219
c,-0.488321,0.930629,-0.064162,-0.959352
f,-1.740104,-0.70228,0.618564,0.882027


In [112]:
s = pd.Series(list('abcde'), index=[0, 3, 2, 5, 4])
s

0    a
3    b
2    c
5    d
4    e
dtype: object

In [113]:
s.loc[3:5]

3    b
2    c
5    d
dtype: object

In [114]:
s.sort_index()

0    a
2    c
3    b
4    e
5    d
dtype: object

In [115]:
s.sort_index().loc[1:6]

2    c
3    b
4    e
5    d
dtype: object

In [116]:
s1 = pd.Series(np.random.randn(5), index=list(range(0, 10, 2)))
s1

0   -0.760983
2   -0.485226
4   -0.197252
6    1.310222
8    2.018925
dtype: float64

In [117]:
s1.iloc[:3]

0   -0.760983
2   -0.485226
4   -0.197252
dtype: float64

In [118]:
s1.iloc[3]

np.float64(1.3102218243073558)

In [120]:
s1.iloc[:3] = 0
s1

0    0.000000
2    0.000000
4    0.000000
6    1.310222
8    2.018925
dtype: float64

## With Dataframe

In [121]:
df1 = pd.DataFrame(np.random.randn(6, 4),
                   index=list(range(0, 12, 2)),
                   columns=list(range(0, 8, 2)))

df1

Unnamed: 0,0,2,4,6
0,0.901423,1.312387,-0.811822,0.274723
2,-0.129047,-0.036746,-0.261045,-0.084137
4,1.8402,-0.699777,0.22652,-1.415794
6,-0.49299,-0.454244,1.151386,-0.609273
8,-0.476935,-1.793629,0.115629,-0.000392
10,0.842736,0.042909,-1.395431,0.127153


In [122]:
df1.iloc[:3]

Unnamed: 0,0,2,4,6
0,0.901423,1.312387,-0.811822,0.274723
2,-0.129047,-0.036746,-0.261045,-0.084137
4,1.8402,-0.699777,0.22652,-1.415794


In [123]:
# 1:5 → Select rows from index position 1 up to but not including 5 (so rows with positions 1, 2, 3, 4).

# 2:4 → Select columns from index position 2 up to but not including 4 (so columns at positions 2 and 3).

df1.iloc[1:5, 2:4]

Unnamed: 0,4,6
2,-0.261045,-0.084137
4,0.22652,-1.415794
6,1.151386,-0.609273
8,0.115629,-0.000392


In [124]:
df1.iloc[[1, 3, 5], [1, 3]]

Unnamed: 0,2,6
2,-0.036746,-0.084137
6,-0.454244,-0.609273
10,0.042909,0.127153


In [125]:
df1.iloc[:, 1:3]

Unnamed: 0,2,4
0,1.312387,-0.811822
2,-0.036746,-0.261045
4,-0.699777,0.22652
6,-0.454244,1.151386
8,-1.793629,0.115629
10,0.042909,-1.395431


In [126]:
df1.iloc[1]

0   -0.129047
2   -0.036746
4   -0.261045
6   -0.084137
Name: 2, dtype: float64

In [127]:
# Same thing is also work in normal python and also numpy
x = list('abcdef')
x

['a', 'b', 'c', 'd', 'e', 'f']

In [131]:
print(x[4:10])
print(x[8:10])

['e', 'f']
[]


In [133]:
s = pd.Series(x)
print(s.iloc[4:10])
print(s.iloc[8:10])

4    e
5    f
dtype: object
Series([], dtype: object)


In [138]:
dfl = pd.DataFrame(np.random.randn(5, 2), columns=list('AB'))
dfl

Unnamed: 0,A,B
0,0.259986,0.853157
1,0.031974,0.101375
2,-1.216915,-2.038186
3,0.016638,1.88101
4,-0.785195,0.310626


In [142]:
dfl.iloc[:, 2:3]

0
1
2
3
4


In [143]:
dfl.iloc[:, 1:3]

Unnamed: 0,B
0,-0.670069
1,-1.393465
2,-0.767105
3,-1.067994
4,-0.204238


In [144]:
dfl.iloc[4:6]

Unnamed: 0,A,B
4,-0.818799,-0.204238


## Boolean indexing

In [151]:
s = pd.Series(range(-3, 4))
s

0   -3
1   -2
2   -1
3    0
4    1
5    2
6    3
dtype: int64

In [152]:
s[s > 0]

4    1
5    2
6    3
dtype: int64

In [153]:
s[(s < -1) | (s > 0.5)]

0   -3
1   -2
4    1
5    2
6    3
dtype: int64

In [154]:
s = pd.Series(np.arange(5), index=np.arange(5)[::-1], dtype='int64')
s

4    0
3    1
2    2
1    3
0    4
dtype: int64

## "where()" method