### Pandas 
Documentation: https://pandas.pydata.org/getting_started.html

In [162]:
import pandas as pd
pd.__version__

'2.2.1'

### Data Structures

#### 1D Array

In [163]:
array = pd.Series([1, 2, 3, 4, 5])

In [164]:
print(array[0])
print(array[1])
print(array[2:3 + 1])

1
2
2    3
3    4
dtype: int64


### 2D Array

In [165]:
df_man = pd.DataFrame({
    "Marks": [100, 98, 89, 98],
    "Blood Group": ["B+", "O+", "O-", "AB+"],
    "Name": ["Kunal", "Ajay", "David", "Paul"]
})

In [166]:
df_man

Unnamed: 0,Marks,Blood Group,Name
0,100,B+,Kunal
1,98,O+,Ajay
2,89,O-,David
3,98,AB+,Paul


In [167]:
import numpy as np
np.__version__

'1.26.4'

In [168]:
dates = pd.date_range("20240406", periods=4)

In [169]:
dates

DatetimeIndex(['2024-04-06', '2024-04-07', '2024-04-08', '2024-04-09'], dtype='datetime64[ns]', freq='D')

In [170]:
df = pd.DataFrame(np.random.randn(4, 4), index=dates, columns=list("ABCD"))

In [171]:
df

Unnamed: 0,A,B,C,D
2024-04-06,0.087672,0.000328,-0.676114,1.442403
2024-04-07,-0.505563,-0.446192,0.155131,1.462237
2024-04-08,1.226179,1.94799,-1.151932,-0.140219
2024-04-09,-1.540331,0.871603,-1.680646,1.705672


In [172]:
df.head()

Unnamed: 0,A,B,C,D
2024-04-06,0.087672,0.000328,-0.676114,1.442403
2024-04-07,-0.505563,-0.446192,0.155131,1.462237
2024-04-08,1.226179,1.94799,-1.151932,-0.140219
2024-04-09,-1.540331,0.871603,-1.680646,1.705672


In [173]:
df.head(1)

Unnamed: 0,A,B,C,D
2024-04-06,0.087672,0.000328,-0.676114,1.442403


In [174]:
df.tail(1)

Unnamed: 0,A,B,C,D
2024-04-09,-1.540331,0.871603,-1.680646,1.705672


In [175]:
df.index

DatetimeIndex(['2024-04-06', '2024-04-07', '2024-04-08', '2024-04-09'], dtype='datetime64[ns]', freq='D')

In [176]:
# df.to_csv(path_or_buf="./ab.csv")

In [177]:
# df.to_html(buf="data.html")

In [178]:
df.to_numpy()

array([[ 8.76716892e-02,  3.28323250e-04, -6.76113663e-01,
         1.44240253e+00],
       [-5.05562959e-01, -4.46192427e-01,  1.55130614e-01,
         1.46223650e+00],
       [ 1.22617926e+00,  1.94799004e+00, -1.15193215e+00,
        -1.40218912e-01],
       [-1.54033111e+00,  8.71602604e-01, -1.68064641e+00,
         1.70567166e+00]])

In [179]:
df.describe()

Unnamed: 0,A,B,C,D
count,4.0,4.0,4.0,4.0
mean,-0.183011,0.593432,-0.83839,1.117523
std,1.155486,1.055904,0.779128,0.846996
min,-1.540331,-0.446192,-1.680646,-0.140219
25%,-0.764255,-0.111302,-1.284111,1.046747
50%,-0.208946,0.435965,-0.914023,1.45232
75%,0.372299,1.140699,-0.468303,1.523095
max,1.226179,1.94799,0.155131,1.705672


In [180]:
# Transpose data
df.T

Unnamed: 0,2024-04-06,2024-04-07,2024-04-08,2024-04-09
A,0.087672,-0.505563,1.226179,-1.540331
B,0.000328,-0.446192,1.94799,0.871603
C,-0.676114,0.155131,-1.151932,-1.680646
D,1.442403,1.462237,-0.140219,1.705672


In [181]:
df_man

Unnamed: 0,Marks,Blood Group,Name
0,100,B+,Kunal
1,98,O+,Ajay
2,89,O-,David
3,98,AB+,Paul


In [182]:
df_man.describe()

Unnamed: 0,Marks
count,4.0
mean,96.25
std,4.924429
min,89.0
25%,95.75
50%,98.0
75%,98.5
max,100.0


In [183]:
df_man.T

Unnamed: 0,0,1,2,3
Marks,100,98,89,98
Blood Group,B+,O+,O-,AB+
Name,Kunal,Ajay,David,Paul


In [184]:
df_man.sort_values(by="Marks")

Unnamed: 0,Marks,Blood Group,Name
2,89,O-,David
1,98,O+,Ajay
3,98,AB+,Paul
0,100,B+,Kunal


In [185]:
df_man.sort_values(by="Marks", ascending=False)

Unnamed: 0,Marks,Blood Group,Name
0,100,B+,Kunal
1,98,O+,Ajay
3,98,AB+,Paul
2,89,O-,David


In [186]:
df_man["Marks"]

0    100
1     98
2     89
3     98
Name: Marks, dtype: int64

In [187]:
df_man["Name"]

0    Kunal
1     Ajay
2    David
3     Paul
Name: Name, dtype: object

In [188]:
df_man[0:3]

Unnamed: 0,Marks,Blood Group,Name
0,100,B+,Kunal
1,98,O+,Ajay
2,89,O-,David


In [189]:
# Location (.loc[row, col])

df_man.loc[2, "Marks"]

89

In [190]:
df_man.loc[2]

Marks             89
Blood Group       O-
Name           David
Name: 2, dtype: object

In [191]:
df_man.loc[:, ["Marks"]]

Unnamed: 0,Marks
0,100
1,98
2,89
3,98


In [192]:
df_man.loc[:, :]

Unnamed: 0,Marks,Blood Group,Name
0,100,B+,Kunal
1,98,O+,Ajay
2,89,O-,David
3,98,AB+,Paul


In [193]:
## Boolean Indexing, i.e select if only true
df_man[df_man["Marks"] > 90]

Unnamed: 0,Marks,Blood Group,Name
0,100,B+,Kunal
1,98,O+,Ajay
3,98,AB+,Paul


In [194]:
df_man[df_man["Blood Group"].str.endswith("+")]

Unnamed: 0,Marks,Blood Group,Name
0,100,B+,Kunal
1,98,O+,Ajay
3,98,AB+,Paul


In [195]:
df_man = df_man.reindex(index=df_man.index, columns=list(df_man.columns) + ["Last Name"])

In [196]:
# Fill Missing Values NaN
df_man = df_man.fillna("Empty")

In [197]:
df_man

Unnamed: 0,Marks,Blood Group,Name,Last Name
0,100,B+,Kunal,Empty
1,98,O+,Ajay,Empty
2,89,O-,David,Empty
3,98,AB+,Paul,Empty


In [202]:
df_man.at[0, "Last Name"] = "Singh"

In [200]:
df_man

Unnamed: 0,Marks,Blood Group,Name,Last Name
0,100,B+,Kunal,Singh
1,98,O+,Ajay,Empty
2,89,O-,David,Empty
3,98,AB+,Paul,Empty


In [203]:
pd.isna(df_man)

Unnamed: 0,Marks,Blood Group,Name,Last Name
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,False,False


### User Defined Operation (using Lambda)

In [212]:
df_man.agg(lambda x: print(x))

0    100
1     98
2     89
3     98
Name: Marks, dtype: int64
0     B+
1     O+
2     O-
3    AB+
Name: Blood Group, dtype: object
0    Kunal
1     Ajay
2    David
3     Paul
Name: Name, dtype: object
0    Singh
1    Empty
2    Empty
3    Empty
Name: Last Name, dtype: object


Marks          None
Blood Group    None
Name           None
Last Name      None
dtype: object

In [214]:
s = pd.Series(np.random.randint(0, 9, size=10))

In [215]:
s

0    3
1    5
2    0
3    5
4    5
5    5
6    4
7    1
8    3
9    7
dtype: int32