In [578]:
import pandas as pd
import numpy as np

# ================== Basics ==================

## ----------- Series -----------
> class pandas.Series(data=None, index=None, dtype=None, name=None, copy=None, fastpath=<no_default>)

### ----------- SERIES CREATION -----------

In [539]:
row = ["A", "B", "C"]
print(type(row))
print(row)

# ➤➤➤➤➤➤ defaults with header_name = 0  & index = range(0-...) ➤➤➤➤➤➤
print("\n-------------------------")
s1 = pd.Series(data = row, dtype = None, name = None, copy = False) 
print(type(s1))
s1

<class 'list'>
['A', 'B', 'C']

-------------------------
<class 'pandas.core.series.Series'>


0    A
1    B
2    C
dtype: object

In [287]:
print("\n------------ s2 -------------")
lst1 = [3, 2, np.nan] # [3.0, 2.0, NaN] ➤➤➤ float64
s2 = pd.Series(lst1)
print(s2)

print("\n------------ s3 -------------")
lst2 = ['a', 2, np.nan, 0] # [a, 2, NaN] ➤➤➤ object
# Mixed types → dtype becomes object
s3 = pd.Series(lst2)
print(s3)

# From dictionary (keys = index, values = data)
print("\n------------ s4 -------------")
s4 = pd.Series({"a": 10, "b": 20, "c": 30})
print(s4)


------------ s2 -------------
0    3.0
1    2.0
2    NaN
dtype: float64

------------ s3 -------------
0      a
1      2
2    NaN
3      0
dtype: object

------------ s4 -------------
a    10
b    20
c    30
dtype: int64


In [289]:
# Explicit index
print("\n------------ s5 -------------")
lst3 = ['a', 2, np.nan, 0]
s5 = pd.Series(lst3, index=["val1", lst3, None, np.nan])
print(s5)

print("\n------------ s6 -------------")
s6 = pd.Series(lst3, dtype = str) # object
print(s6)

print("\n------------ s7 -------------")
s7 = pd.Series(lst3, dtype = bool) # bool ❌ >> Values converted to [True, True, True, False]
print(s7)

print("\n------------ s8 -------------")
s8 = pd.Series(lst3, name = "Dummy-Series")
print(s8)


------------ s5 -------------
val1                a
[a, 2, nan, 0]      2
None              NaN
NaN                 0
dtype: object

------------ s6 -------------
0      a
1      2
2    NaN
3      0
dtype: object

------------ s7 -------------
0     True
1     True
2     True
3    False
dtype: bool

------------ s8 -------------
0      a
1      2
2    NaN
3      0
Name: Dummy-Series, dtype: object


### ----------- SERIES ATTRIBUTES -----------

In [None]:

lst = ['a', 2, np.nan, 0, 'a']
s = pd.Series(lst, name = "Dummy-Series", index=[0, "val1", lst, None, np.nan])

print(s.dtype)     # data type of the series itself
print(s.dtypes)     # data type

print("\n----------- [[ Transpose ]] --------------")
print(s.T)     # Transpose {Basically same in case of Series}

print("\n----------- [[ Representation ]] --------------")
print(s.name)      # name of series
print(s.values)    # underlying numpy array
print(s.index)     # Index object ::::: row labels

print("\n---------- [[ Dims ]] ---------------")
print(s.shape)     # n(rows), n(cols)
print(s.size)      # total n(elements)
print(s.ndim)      # dimension
print(s.nbytes)    # memory usage

print("\n---------- [[ Flags ]] ---------------")
print(s.empty)     # True/False if empty
print(s.is_unique) # True/False for uniqueness of elements
print(s.hasnans)   # True if NaNs present
print(s.flags)     # internal flags



object
object
(5,)

----------- [[ Transpose ]] --------------
0                      a
val1                   2
[a, 2, nan, 0, a]    NaN
None                   0
NaN                    a
Name: Dummy-Series, dtype: object

----------- [[ Representation ]] --------------
Dummy-Series
['a' 2 nan 0 'a']
Index([0, 'val1', ['a', 2, nan, 0, 'a'], None, nan], dtype='object')

---------- [[ Dims ]] ---------------
5
1
40

---------- [[ Flags ]] ---------------
False
False
True
<Flags(allows_duplicate_labels=True)>


In [None]:

print("\n------------ item:- Not a series method, --> it's for Numpy scalar method -------------")
print(s.item)

print("\n----------- items:- Reference to the method --------------")
print("----------- It basically means: This is a method attached to this object, but you haven’t called it yet. --------------")
srs.items


------------ item:- Not a series method, --> it's for Numpy scalar method -------------
<bound method IndexOpsMixin.item of 0                      a
val1                   2
[a, 2, nan, 0, a]    NaN
None                   0
NaN                    a
Name: Dummy-Series, dtype: object>

----------- items:- Reference to the method --------------
----------- It basically means: This is a method attached to this object, but you haven’t called it yet. --------------
<bound method Series.items of 0      a
1      2
2    NaN
3      0
dtype: object>


<bound method Series.items of 0      a
1      2
2    NaN
3      0
dtype: object>

### ----------- SERIES METHODS -----------

#### Representation/ Coversion

In [469]:
lst = ['a', 2, np.nan, 0, 'a']
s = pd.Series(lst, name = "Dummy-Series", index=[0, "val1", lst, None, np.nan])

# ➤➤➤➤➤➤ Returns index-object ➤➤➤➤➤➤
print("\n------------ keys() -------------")
print(s.keys())     # same as s.index

# ➤➤➤➤➤➤ Index object (all row labels) ➤➤➤➤➤➤
print("\n------------ index -------------")
print(s.index) 



------------ keys() -------------
Index([0, 'val1', ['a', 2, nan, 0, 'a'], None, nan], dtype='object')

------------ index -------------
Index([0, 'val1', ['a', 2, nan, 0, 'a'], None, nan], dtype='object')


In [567]:
# g  Returns an Iterator:- zip-object ➡️➡️➡️➡️
print("\n------------ items() -------------")
print(srs.items()) 

print("\n----------- Actually executes the method --------------")
for k, v in srs.items():
    print(f"k: {k}, v: {v}")

print("\n-------------------------")
[*indexes] = srs.items()
print(*indexes)  
print(indexes)       # (index, value) pairs 


------------ items() -------------
<zip object at 0x733fbf2df9c0>

----------- Actually executes the method --------------
k: 0, v: a
k: 1, v: 2
k: 2, v: nan
k: 3, v: 0

-------------------------
(0, 'a') (1, 2) (2, nan) (3, 0)
[(0, 'a'), (1, 2), (2, nan), (3, 0)]


#### Element Access/ Pop/ Assign Values

In [572]:
lst = ['a', 2, np.nan, 0, 'vallll']
s1 = pd.Series(lst, name = "Dummy-Series", index=[0, "val1", lst, None, np.nan])
s = pd.Series(lst, name = "Dummy-Series", index=[0, 'val1', 2.0, tuple(lst), np.nan])

print("\n------------ at: single ele, by label -------------")
# print(s1.at["val1"])        # ❌ TypeError: unhashable type: 'list':::: In Pandas, index labels must be hashable
# print(s.at[None])           # ❌ KeyError: None
# print(s.at['np.nan'])       # ❌ KeyError: 'np.nan'
# print(s.at[10])             # ❌ KeyError: 10
print(s.at['val1']) 
print(s.at[np.nan])
print(s.at[0])                # 0 as index is present in the series
print("\n------------")
s.at[np.nan] = 'val'
print(s)


print("\n------------ iat: single ele, by position -------------")
# print(s.iat["val1"])       # ❌ ValueError: iAt based indexing can only have integer indexers
# print(s.iat[10])           # ❌ IndexError: index 10 is out of bounds for axis 0 with size 5
print(s.iat[0]) 
print(s.iat[-1])   
print("\n------------")
s.iat[-1] = 'vallll'
print(s)


------------ at: single ele, by label -------------
2
vallll
a

------------
0                           a
val1                        2
2.0                       NaN
(a, 2, nan, 0, vallll)      0
NaN                       val
Name: Dummy-Series, dtype: object

------------ iat: single ele, by position -------------
a
val

------------
0                              a
val1                           2
2.0                          NaN
(a, 2, nan, 0, vallll)         0
NaN                       vallll
Name: Dummy-Series, dtype: object


In [372]:
lst = ['a', 2, np.nan, 0, 'vallll']
s = pd.Series(lst, name = "Dummy-Series", index=[0, 'val1', 2.0, tuple(lst), np.nan])

print("\n------------ loc: multiple ele, by label -------------")
# print(s.loc[tuple(lst)])      # ❌ IndexingError: Too many indexers
# s.loc[0:3]                    # ❌ TypeError: cannot do slice indexing on Index with these indexers [0] of type int
print(s.loc[0]) 
print(s.loc["val1"]) 
print("------------")
print(s.loc[np.nan])          
print(s.loc[2.0 : np.nan : 2])  # ➡️➡️ Last label's ele also included 
print("------------")
s.loc[: : 2] = 555
print(s)

print("\n------------ iloc: multiple ele, by position -------------")
print(s.iloc[0:3])             # ➡️➡️ Last label's ele NOT included
print("------------")
s.iloc[1:: 2] = 999
print(s)


------------ loc: multiple ele, by label -------------
a
2
------------
vallll
2.0       NaN
NaN    vallll
Name: Dummy-Series, dtype: object
------------
0                         555
val1                        2
2.0                       555
(a, 2, nan, 0, vallll)      0
NaN                       555
Name: Dummy-Series, dtype: object

------------ iloc: multiple ele, by position -------------
0       555
val1      2
2.0     555
Name: Dummy-Series, dtype: object
------------
0                         555
val1                      999
2.0                       555
(a, 2, nan, 0, vallll)    999
NaN                       555
Name: Dummy-Series, dtype: object


In [375]:
s = pd.Series(range(5))
print(s)

# Pop element
print(s.pop(0))
print("------------")
print(s)

0    0
1    1
2    2
3    3
4    4
dtype: int64
0
------------
1    1
2    2
3    3
4    4
dtype: int64


#### Mathematical

In [576]:
s = pd.Series([0.2, 0.4, 0.5, 0.8, 1])
print(s)

# Round values
print(s.round())

0    0.2
1    0.4
2    0.5
3    0.8
4    1.0
dtype: float64
0    0.0
1    0.0
2    0.0
3    1.0
4    1.0
dtype: float64


#### Comparisons & Transformations

In [385]:
a = pd.Series([1, 1, 1, np.nan, 1], index=['a', 'b', 'c', 'd', 'e'])
b = pd.Series([0, 1, 2, np.nan, 1], index=['a', 'b', 'c', 'd', 'f'])

# element-wise comparison
print(a.le(b))                # default fill-value None
print("------------")
print(a.le(b, fill_value=0))  # with fill for NaN alignment
print("------------")

# Transform with functions
s = pd.Series(range(5))
print(s)
print("------------")
print(s.transform([np.sqrt, np.exp]))


a    False
b     True
c     True
d    False
e    False
f    False
dtype: bool
------------
a    False
b     True
c     True
d    False
e    False
f     True
dtype: bool
------------
0    0
1    1
2    2
3    3
4    4
dtype: int64
------------
       sqrt        exp
0  0.000000   1.000000
1  1.000000   2.718282
2  1.414214   7.389056
3  1.732051  20.085537
4  2.000000  54.598150


#### Where, Mask

In [390]:
s = pd.Series(range(5))
s = pd.Series([0,0,0])
t = pd.Series([True, False, True, False, True])
t = pd.Series([False, True, False, True, False])

print(s.where(t, "not-true")) # change where condition is false
print("------------")
print(s.mask(t, "is-true")) # change where condition is true
print("------------")


srs = srs.set_flags(allows_duplicate_labels=False)
print(srs.flags)
print("------------")

srs.set_flags(allows_duplicate_labels=True)
print(srs.flags)


0    not-true
1           0
2    not-true
dtype: object
------------
0          0
1    is-true
2          0
dtype: object
------------
<Flags(allows_duplicate_labels=False)>
------------
<Flags(allows_duplicate_labels=False)>


## ----------- Dataframe -----------
> class pandas.DataFrame(data=None, index=None, columns=None, dtype=None, copy=None)

### ----------- DATAFRAME CREATION -----------

In [419]:
print("\n----------- From dictionary of lists --------------")
dt = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [24, 30, 22],
    "City": ["Delhi", "Mumbai", np.nan]
}
print(dt)

print("\n-------------------------")
df1 = pd.DataFrame(dt)
print(df1)
print("-------------------------")
df1


----------- From dictionary of lists --------------
{'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [24, 30, 22], 'City': ['Delhi', 'Mumbai', nan]}

-------------------------
      Name  Age    City
0    Alice   24   Delhi
1      Bob   30  Mumbai
2  Charlie   22     NaN
-------------------------


Unnamed: 0,Name,Age,City
0,Alice,24,Delhi
1,Bob,30,Mumbai
2,Charlie,22,


In [421]:
print("\n----------- From list of dicts --------------")
dt = [
    {"Name": "Alice", "Age": 24, "Num": 5},
    {"Name": "Bob", "Age": 30, "Num": 5},
    {"Name": "Charlie", "Agessss": 35, "Num": 5}
]
print(dt)

print("\n-------------------------")
df2 = pd.DataFrame(dt)
df2


----------- From list of dicts --------------
[{'Name': 'Alice', 'Age': 24, 'Num': 5}, {'Name': 'Bob', 'Age': 30, 'Num': 5}, {'Name': 'Charlie', 'Agessss': 35, 'Num': 5}]

-------------------------


Unnamed: 0,Name,Age,Num,Agessss
0,Alice,24.0,5,
1,Bob,30.0,5,
2,Charlie,,5,35.0


In [430]:
print(type(df2.loc[0]["Age"]))
print(type(df2.loc[2]["Age"]))
print(type(df2.loc[2]["Num"]))
print(type(df2.loc[2]["Name"]))

<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.int64'>
<class 'str'>


In [431]:
print("\n----------- From numpy array --------------")
dt = np.arange(12).reshape(4,3)
print(dt)

print("\n-------------------------")
df3 = pd.DataFrame(dt)
print(df3)

print("\n-------------------------")
df3 = pd.DataFrame(dt, columns=["A","B","C"])
df3


----------- From numpy array --------------
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]

-------------------------
   0   1   2
0  0   1   2
1  3   4   5
2  6   7   8
3  9  10  11

-------------------------


Unnamed: 0,A,B,C
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11


In [452]:

print("\n----------- From Series dict --------------")
dt1 = pd.Series([1,2], index=["a","b"])
dt2 = pd.Series([3,4,5], index=["a","b","c"])
dt3 = pd.Series([6,7,8,9], index=["a","b","c","d"])
dt4 = pd.Series([2, 3], index=['f', 'e'])
print(dt1)
print(dt2)
print(dt3)
print(dt4)

print("\n-------------------------")
df4 = pd.DataFrame({"col1": dt1, "col3": dt3, "col2": dt2, 'col4': dt4})
df4


----------- From Series dict --------------
a    1
b    2
dtype: int64
a    3
b    4
c    5
dtype: int64
a    6
b    7
c    8
d    9
dtype: int64
f    2
e    3
dtype: int64

-------------------------


Unnamed: 0,col1,col3,col2,col4
a,1.0,6.0,3.0,
b,2.0,7.0,4.0,
c,,8.0,5.0,
d,,9.0,,
e,,,,3.0
f,,,,2.0


In [524]:
print("\n----------- From Dataclass --------------")
from dataclasses import make_dataclass

Point = make_dataclass("Point", [("x", int), ("y", int)])
a = Point(0,0)
b = Point(0,3)
c = Point(2,3)

print(Point)
print(a, b, c, sep = "\n")

print("-----------")
df = pd.DataFrame([a, b, c])


----------- From Dataclass --------------
<class 'types.Point'>
Point(x=0, y=0)
Point(x=0, y=3)
Point(x=2, y=3)
-----------


In [466]:
dt = {'col1': [1, 2], 'col2': [3, 4]}
df1 = pd.DataFrame(data=dt)
print(df1)

print("\n-------------------------")
df2 = pd.DataFrame(data = None)
print(df2)
# df2

print("\n-------------------------")
print(df2.dtypes)
df3 = pd.DataFrame(dt, dtype=None)
print(df3.dtypes)

print("\n-------------------------")
df4 = pd.DataFrame(dt, dtype=np.float64)
print(df4.dtypes)
print("-------------------------")
print(df4)

print("\n-------------------------")
df5 = pd.DataFrame(dt, index=['x', 'y'])
print(df5)

print("\n-------------------------")
df6 = pd.DataFrame(dt, columns = ['col1', 'c2'])
print(df6)

print("\n-------------------------")
df7 = pd.DataFrame(dt, columns = ['c2'])
print(df7)



   col1  col2
0     1     3
1     2     4

-------------------------
Empty DataFrame
Columns: []
Index: []

-------------------------
Series([], dtype: object)
col1    int64
col2    int64
dtype: object

-------------------------
col1    float64
col2    float64
dtype: object
-------------------------
   col1  col2
0   1.0   3.0
1   2.0   4.0

-------------------------
   col1  col2
x     1     3
y     2     4

-------------------------
   col1   c2
0     1  NaN
1     2  NaN

-------------------------
Empty DataFrame
Columns: [c2]
Index: []


In [525]:
len(df)

3

### ----------- ADDING/ REMOVING CLOUMNS -----------

In [521]:
dt = {'col1': [1, 2], 'col2': [3, 4]}
df1 = pd.DataFrame(data=dt)
print(df1)

print("\n-------------------------")
# Add new column
df1["Country"] = ["India", "Japan"]
print(df1)
print("-------------------------")
# df1["Continent"] = ["Asia"]       # ❌ ValueError: Length of values (1) does not match length of index (2)
df1["Continent"] = "Asia"
print(df1)

# Drop column
# df1.drop("Country")       # ❌ KeyError: "['Country'] not found in axis"
# df1.drop("Country", axis=0, inplace=True)       # ❌ KeyError: "['Country'] not found in axis"
df1.drop("Country", axis=1, inplace=True)
df1

   col1  col2
0     1     3
1     2     4

-------------------------
   col1  col2 Country
0     1     3   India
1     2     4   Japan
-------------------------
   col1  col2 Country Continent
0     1     3   India      Asia
1     2     4   Japan      Asia


Unnamed: 0,col1,col2,Continent
0,1,3,Asia
1,2,4,Asia


### ----------- ADDING/ REMOVING ROWS -----------

In [529]:
dt = {'col1': [1, 2], 'col2': [3, 4]}
df1 = pd.DataFrame(data=dt)
print(df1)

print("\n-------------------------")
# Add row
df1.loc[len(df1)] = ["David", 29]
print(df1)

# Drop row
df1.drop(0, axis=0, inplace=True)
df1


   col1  col2
0     1     3
1     2     4

-------------------------
    col1  col2
0      1     3
1      2     4
2  David    29


Unnamed: 0,col1,col2
1,2,4
2,David,29


### ----------- DATAFRAME ATTRIBUTES -----------

In [577]:
dt = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [24, 30, 22],
    "City": ["Delhi", "Mumbai", np.nan],
    "Weight": [60, np.nan, 50]
}
df = pd.DataFrame(dt)
print(df)

print("\n-------------------------")
# print(df.dtype)       # ❌ AttributeError: 'DataFrame' object has no attribute 'dtype'
print(df.dtypes)    # dtypes
print("-------------------------")
print(df["Age"].dtypes)    # dtypes
print("-------------------------")
print(df.shape)     # dimensions
print("-------------------------")
print(len(df))     # n(rows)

print("\n----------- [[ Transpose ]] --------------")
print(df.T)

print("\n----------- [[ Representation ]] --------------")
print(df.values)     # Numpy representation
print("-------------------------")
print(df.index)      # row labels
print("-------------------------")
print(df.columns)    # column labels
print("-------------------------")
print(df.axes)       # axes of the df

print("\n---------- [[ Dims ]] ---------------")
print(df.size)       # int for n(eles)
print(df.ndim)       # int for n(axes / array dimensions)
# print(df.nbytes)       # ❌ AttributeError: 'DataFrame' object has no attribute 'nbytes'

print("\n---------- [[ Flags ]] ---------------")
print(df.empty)
print(df.flags)      # Get the properties associated with this pandas object
# print(df.is_unique)       # ❌ AttributeError: 'DataFrame' object has no attribute 'is_unique'
# print(df.hasnans)       # ❌ AttributeError: 'DataFrame' object has no attribute 'hasnans'

print("\n----------- [[ Style ]] --------------")
df.style          # Returns a Styler object


      Name  Age    City  Weight
0    Alice   24   Delhi    60.0
1      Bob   30  Mumbai     NaN
2  Charlie   22     NaN    50.0

-------------------------
Name       object
Age         int64
City       object
Weight    float64
dtype: object
-------------------------
int64
-------------------------
(3, 4)
-------------------------
3

----------- [[ Transpose ]] --------------
            0       1        2
Name    Alice     Bob  Charlie
Age        24      30       22
City    Delhi  Mumbai      NaN
Weight   60.0     NaN     50.0

----------- [[ Representation ]] --------------
[['Alice' 24 'Delhi' 60.0]
 ['Bob' 30 'Mumbai' nan]
 ['Charlie' 22 nan 50.0]]
-------------------------
RangeIndex(start=0, stop=3, step=1)
-------------------------
Index(['Name', 'Age', 'City', 'Weight'], dtype='object')
-------------------------
[RangeIndex(start=0, stop=3, step=1), Index(['Name', 'Age', 'City', 'Weight'], dtype='object')]

---------- [[ Dims ]] ---------------
12
2

---------- [[ Flags ]] -----

Unnamed: 0,Name,Age,City,Weight
0,Alice,24,Delhi,60.0
1,Bob,30,Mumbai,
2,Charlie,22,,50.0


### ----------- DATAFRAME METHODS -----------

In [537]:
dt = {
    "Name": ["Alice", "Bob", "Charlie"],
    "Age": [24, 30, 22],
    "City": ["Delhi", "Mumbai", np.nan],
    "Weight": [60, np.nan, 50]
}
df = pd.DataFrame(dt)
print(df)

# Get the 'info axis'
print("\n------------ keys() -------------")
print(df.keys())

# The index (row labels)
print("\n------------ index -------------")
print(df.index) 


      Name  Age    City  Weight
0    Alice   24   Delhi    60.0
1      Bob   30  Mumbai     NaN
2  Charlie   22     NaN    50.0

------------ keys() -------------
Index(['Name', 'Age', 'City', 'Weight'], dtype='object')

------------ index -------------
RangeIndex(start=0, stop=3, step=1)


## Series

❌❌❌❌❌❌❌

In [274]:
srs1 = dtdf["Name"]
print(srs1)
print(type(srs1))

print("\n-------------------------")
srs1s = pd.Series(dtdf["Name"], name="abc")
print(srs1)
print(type(srs1))

print("\n-------------------------")
srs1df = pd.DataFrame(srs1)
print(srs1df)
print(type(srs1df))

print("\n-------------------------")
srs1sdf = pd.DataFrame(srs1s)
print(srs1sdf)
print(type(srs1sdf))


0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
<class 'pandas.core.series.Series'>

-------------------------
0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
<class 'pandas.core.series.Series'>

-------------------------
      Name
0    Alice
1      Bob
2  Charlie
<class 'pandas.core.frame.DataFrame'>

-------------------------
       abc
0    Alice
1      Bob
2  Charlie
<class 'pandas.core.frame.DataFrame'>


In [249]:
print(type([dtdf["Name"]]))

print("\n-------------------------")
print([dtdf["Name"]])

print("\n-------------------------")
srs2 = pd.Series([dtdf["Name"]], name="abc")
print(srs2)

print("\n-------------------------")
srs2df = pd.DataFrame(srs2)
srs2df

<class 'list'>

-------------------------
[0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object]

-------------------------
0    0      Alice
1        Bob
2    Charlie
Name: N...
Name: abc, dtype: object

-------------------------


Unnamed: 0,abc
0,0 Alice 1 Bob 2 Charlie Name: N...
