### Pandas Labels

In [13]:
import pandas as pd
import numpy as np

# -------------------- Series Creation --------------------

# 1. Creating a Series with custom string labels
series1 = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
print(f"1. Series with custom string labels:\n{series1}")
# Output:
# a    10
# b    20
# c    30
# dtype: int64

# 2. Creating a Series with custom integer labels
series2 = pd.Series([100, 200, 300], index=[1, 2, 3])
print(f"\n2. Series with custom integer labels:\n{series2}")
# Output:
# 1    100
# 2    200
# 3    300
# dtype: int64

# 3. Creating a Series with custom date labels
date_labels = pd.date_range('2023-01-01', periods=3)
series3 = pd.Series([7, 14, 21], index=date_labels)
print(f"\n3. Series with custom date labels:\n{series3}")
# Output:
# 2023-01-01     7
# 2023-01-02    14
# 2023-01-03    21
# dtype: int64

# 4. Creating a Series with custom tuple labels
series4 = pd.Series([50, 60, 70], index=[('X', 1), ('Y', 2), ('Z', 3)])
print(f"\n4. Series with custom tuple labels:\n{series4}")
# Output:
# (X, 1)    50
# (Y, 2)    60
# (Z, 3)    70
# dtype: int64

# 5. Creating a Series using default integer labels (no custom index)
series5 = pd.Series([5, 10, 15])
print(f"\n5. Series with default integer labels:\n{series5}")
# Output:
# 0     5
# 1    10
# 2    15
# dtype: int64

# 6. Creating a Series with mixed index types (strings and integers)
series6 = pd.Series([1000, 2000, 3000], index=['a', 1, 'c'])
print(f"\n6. Series with mixed index types:\n{series6}")
# Output:
# a     1000
# 1     2000
# c     3000
# dtype: int64


# -------------------- DataFrame Creation --------------------

# 1. Creating a DataFrame with custom row and column labels
data1 = {'A': [10, 20, 30], 'B': [40, 50, 60]}
df1 = pd.DataFrame(data1, index=['x', 'y', 'z'])
print(f"\n1. DataFrame with custom row and column labels:\n{df1}")
# Output:
#     A   B
# x  10  40
# y  20  50
# z  30  60

# 2. Creating a DataFrame with default row labels and custom column labels
data2 = {'A': [100, 200, 300], 'B': [400, 500, 600]}
df2 = pd.DataFrame(data2, columns=['B', 'A'], index=[0, 1, 2])
print(f"\n2. DataFrame with default row labels and custom column labels:\n{df2}")
# Output:
#     B   A
# 0  400  100
# 1  500  200
# 2  600  300

# 3. Creating a DataFrame with custom column labels and default row labels
data3 = {'name': ['Alice', 'Bob', 'Charlie'], 'age': [25, 30, 35]}
df3 = pd.DataFrame(data3, columns=['name', 'age'], index=[0, 1, 2])
print(f"\n3. DataFrame with custom column labels and default row labels:\n{df3}")
# Output:
#        name  age
# 0     Alice   25
# 1       Bob   30
# 2  Charlie   35

# 4. Creating a DataFrame with mixed index labels (strings and integers)
data4 = {'X': [1, 2], 'Y': [3, 4]}
df4 = pd.DataFrame(data4, index=['row1', 2])
print(f"\n4. DataFrame with mixed index labels:\n{df4}")
# Output:
#        X  Y
# row1   1  3
# 2      2  4

# 5. Creating a DataFrame with MultiIndex (Hierarchical Indexing)
index = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 1)], names=['letter', 'number'])
data5 = {'Value': [10, 20, 30]}
df5 = pd.DataFrame(data5, index=index)
print(f"\n5. DataFrame with MultiIndex:\n{df5}")
# Output:
#               Value
# letter number       
# A      1          10
#        2          20
# B      1          30

# 6. Creating an empty DataFrame with column labels
df6 = pd.DataFrame(columns=['Column1', 'Column2', 'Column3'])
print(f"\n6. Empty DataFrame with column labels:\n{df6}")
# Output:
# Empty DataFrame
# Columns: [Column1, Column2, Column3]
# Index: []

# 7. Creating a DataFrame with date labels as index
dates = pd.date_range('20230101', periods=3)
data7 = {'Data': [100, 200, 300]}
df7 = pd.DataFrame(data7, index=dates)
print(f"\n7. DataFrame with date labels as index:\n{df7}")
# Output:
#             Data
# 2023-01-01   100
# 2023-01-02   200
# 2023-01-03   300


1. Series with custom string labels:
a    10
b    20
c    30
dtype: int64

2. Series with custom integer labels:
1    100
2    200
3    300
dtype: int64

3. Series with custom date labels:
2023-01-01     7
2023-01-02    14
2023-01-03    21
Freq: D, dtype: int64

4. Series with custom tuple labels:
(X, 1)    50
(Y, 2)    60
(Z, 3)    70
dtype: int64

5. Series with default integer labels:
0     5
1    10
2    15
dtype: int64

6. Series with mixed index types:
a    1000
1    2000
c    3000
dtype: int64

1. DataFrame with custom row and column labels:
    A   B
x  10  40
y  20  50
z  30  60

2. DataFrame with default row labels and custom column labels:
     B    A
0  400  100
1  500  200
2  600  300

3. DataFrame with custom column labels and default row labels:
      name  age
0    Alice   25
1      Bob   30
2  Charlie   35

4. DataFrame with mixed index labels:
      X  Y
row1  1  3
2     2  4

5. DataFrame with MultiIndex:
               Value
letter number       
A      1          10

### Accessing Data Using Pandas Labels 

In [14]:
import pandas as pd

# -------------------- Series Accessing --------------------

# Creating a Series with custom index labels
series = pd.Series([10, 20, 30], index=['a', 'b', 'c'])

# Accessing data using label-based indexing
print(f"Accessing data using label 'b': {series.loc['b']}")
# Output: 20

# Accessing data using integer-based indexing (position)
print(f"Accessing data using position 1: {series.iloc[1]}")
# Output: 20

# Accessing multiple labels using .loc[]
print(f"Accessing multiple labels ['a', 'c']: {series.loc[['a', 'c']]}")
# Output:
# a    10
# c    30
# dtype: int64

# Accessing multiple positions using .iloc[]
print(f"Accessing multiple positions [0, 2]: {series.iloc[[0, 2]]}")
# Output:
# a    10
# c    30
# dtype: int64

# -------------------- DataFrame Accessing --------------------

# Creating a DataFrame with custom row and column labels
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data, index=['x', 'y', 'z'])

# Accessing a specific row by label
print(f"Accessing row 'y': \n{df.loc['y']}")
# Output:
# A    2
# B    5
# Name: y, dtype: int64

# Accessing a specific row by position
print(f"Accessing row at position 1: \n{df.iloc[1]}")
# Output:
# A    2
# B    5
# Name: y, dtype: int64

# Accessing a specific column by label
print(f"Accessing column 'B': \n{df['B']}")
# Output:
# x    4
# y    5
# z    6
# Name: B, dtype: int64

# Accessing a specific column using .loc[]
print(f"Accessing column 'A' using .loc: \n{df.loc[:, 'A']}")
# Output:
# x    1
# y    2
# z    3
# Name: A, dtype: int64

# Accessing multiple rows and columns using .loc[]
print(f"Accessing rows 'x' and 'z' and columns 'A' and 'B': \n{df.loc[['x', 'z'], ['A', 'B']]}")
# Output:
#    A  B
# x  1  4
# z  3  6

# Accessing specific elements using .loc[]
print(f"Accessing element in row 'x' and column 'A': {df.loc['x', 'A']}")
# Output: 1

# Accessing data by position using .iloc[]
print(f"Accessing element in row at position 0 and column at position 1: {df.iloc[0, 1]}")
# Output: 4

# Accessing multiple rows and columns using .iloc[]
print(f"Accessing rows 0 and 2, and columns 0 and 1 using .iloc[]: \n{df.iloc[[0, 2], [0, 1]]}")
# Output:
#    A  B
# x  1  4
# z  3  6


Accessing data using label 'b': 20
Accessing data using position 1: 20
Accessing multiple labels ['a', 'c']: a    10
c    30
dtype: int64
Accessing multiple positions [0, 2]: a    10
c    30
dtype: int64
Accessing row 'y': 
A    2
B    5
Name: y, dtype: int64
Accessing row at position 1: 
A    2
B    5
Name: y, dtype: int64
Accessing column 'B': 
x    4
y    5
z    6
Name: B, dtype: int64
Accessing column 'A' using .loc: 
x    1
y    2
z    3
Name: A, dtype: int64
Accessing rows 'x' and 'z' and columns 'A' and 'B': 
   A  B
x  1  4
z  3  6
Accessing element in row 'x' and column 'A': 1
Accessing element in row at position 0 and column at position 1: 4
Accessing rows 0 and 2, and columns 0 and 1 using .iloc[]: 
   A  B
x  1  4
z  3  6


### Renaming/Modifying/Setting/Resetting Labels in Pandas

In [11]:
import pandas as pd
import numpy as np

# -------------------- Series Operations --------------------

# 1. Creating a Series with custom index labels
series = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
print(f"Original Series:\n{series}")
# Output:
# a    10
# b    20
# c    30
# dtype: int64

# 2. Renaming index labels using .rename()
renamed_series = series.rename({'a': 'x', 'b': 'y', 'c': 'z'})
print(f"\nSeries after renaming labels using .rename():\n{renamed_series}")
# Output:
# x    10
# y    20
# z    30
# dtype: int64

# 3. Setting a new index directly
series.index = ['x', 'y', 'z']
print(f"\nSeries after setting custom index labels:\n{series}")
# Output:
# x    10
# y    20
# z    30
# dtype: int64

# 4. Resetting index in Series
reset_series = series.reset_index(drop=True)
print(f"\nSeries after resetting index:\n{reset_series}")
# Output:
# 0    10
# 1    20
# 2    30
# dtype: int64

# -------------------- DataFrame Operations --------------------

# 1. Creating a DataFrame with custom index and columns
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['x', 'y', 'z'])
print(f"\nOriginal DataFrame:\n{df}")
# Output:
#    A  B
# x  1  4
# y  2  5
# z  3  6

# 2. Renaming index and columns using .rename()
renamed_df = df.rename(index={'x': 'alpha', 'y': 'beta'}, columns={'A': 'Alpha', 'B': 'Beta'})
print(f"\nDataFrame after renaming rows and columns using .rename():\n{renamed_df}")
# Output:
#        Alpha  Beta
# alpha      1     4
# beta       2     5
# z          3     6

# 3. Setting a column as the index using .set_index()
df_with_new_index = df.set_index('A')
print(f"\nDataFrame after setting column 'A' as index:\n{df_with_new_index}")
# Output:
#       B
# A      
# 1  4
# 2  5
# 3  6

# 4. Resetting the index of DataFrame to default integers
reset_df = df_with_new_index.reset_index()
print(f"\nDataFrame after resetting index:\n{reset_df}")
# Output:
#    A  B
# 0  1  4
# 1  2  5
# 2  3  6


Original Series:
a    10
b    20
c    30
dtype: int64

Series after renaming labels using .rename():
x    10
y    20
z    30
dtype: int64

Series after setting custom index labels:
x    10
y    20
z    30
dtype: int64

Series after resetting index:
0    10
1    20
2    30
dtype: int64

Original DataFrame:
   A  B
x  1  4
y  2  5
z  3  6

DataFrame after renaming rows and columns using .rename():
       Alpha  Beta
alpha      1     4
beta       2     5
z          3     6

DataFrame after setting column 'A' as index:
   B
A   
1  4
2  5
3  6

DataFrame after resetting index:
   A  B
0  1  4
1  2  5
2  3  6


In [19]:
### Pandas MultiIndex 

In [20]:
import pandas as pd

# -----------------------------------
# Creating a Series with MultiIndex (Hierarchical Indexing)
arrays = [['A', 'A', 'B', 'B'], [1, 2, 1, 2]]
multi_index = pd.MultiIndex.from_arrays(arrays, names=('Letter', 'Number'))
series_multiindex = pd.Series([10, 20, 30, 40], index=multi_index)

print(f"Series with MultiIndex:\n{series_multiindex}")
# Output:
# Letter  Number
# A       1          10
#         2          20
# B       1          30
#         2          40
# dtype: int64

# Accessing data for specific multi-index
print(f"\nAccessing data for ('A', 1): {series_multiindex.loc[('A', 1)]}")
# Output: 10

print(f"\nAccessing data for ('B', 2): {series_multiindex.loc[('B', 2)]}")
# Output: 40

# -----------------------------------
# Creating a DataFrame with MultiIndex (Hierarchical Indexing for Rows)
index = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 1), ('B', 2)], names=['Letter', 'Number'])
df_multiindex = pd.DataFrame({'Value': [10, 20, 30, 40]}, index=index)

print(f"\nDataFrame with MultiIndex (Rows):\n{df_multiindex}")
# Output:
#               Value
# Letter Number       
# A      1          10
#        2          20
# B      1          30
#        2          40

# Accessing data for specific multi-index (row index)
print(f"\nAccessing data for ('A', 1): \n{df_multiindex.loc[('A', 1)]}")
# Output:
# Value    10
# Name: (A, 1), dtype: int64

# Accessing rows for 'A' using MultiIndex (row labels)
print(f"\nAccessing rows for 'A': \n{df_multiindex.loc['A']}")
# Output:
#        Value
# Number       
# 1          10
# 2          20

# Accessing data for multiple levels in multi-index row
print(f"\nAccessing data for 'B' and Numbers 1 and 2: \n{df_multiindex.loc[('B', 1), 'Value']}")
# Output: 30


Series with MultiIndex:
Letter  Number
A       1         10
        2         20
B       1         30
        2         40
dtype: int64

Accessing data for ('A', 1): 10

Accessing data for ('B', 2): 40

DataFrame with MultiIndex (Rows):
               Value
Letter Number       
A      1          10
       2          20
B      1          30
       2          40

Accessing data for ('A', 1): 
Value    10
Name: (A, 1), dtype: int64

Accessing rows for 'A': 
        Value
Number       
1          10
2          20

Accessing data for 'B' and Numbers 1 and 2: 
30


### Handling Missing Labels 

In [24]:
import pandas as pd
import numpy as np

# =======================
# Handling Missing Labels, Reindexing, and Filling Missing Labels in Series
# =======================
# Creating a Series with some data
series = pd.Series([10, 20, 30], index=['a', 'b', 'c'])

# Accessing a missing label (this will raise a KeyError)
try:
    print(series['d'])
except KeyError:
    print("Label 'd' not found in the Series")

# Using .get() method to safely access a missing label (returns None instead of raising an error)
print(f"Accessing missing label 'd' using .get(): {series.get('d', 'Label not found')}")

# Using .reindex() to handle missing labels
new_series = series.reindex(['a', 'b', 'c', 'd'])
print(f"\nReindexed Series with a missing label 'd':\n{new_series}")
# Output: NaN for missing 'd'

# Filling missing labels using .fillna()
filled_series = new_series.fillna(0)
print(f"\nReindexed Series with NaN values filled with 0:\n{filled_series}")

# =======================
# Handling Missing Labels, Reindexing, and Filling Missing Labels in DataFrame
# =======================
# Creating a DataFrame with some data
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
df = pd.DataFrame(data, index=['x', 'y', 'z'])

# Accessing a missing row label (this will raise a KeyError)
try:
    print(df.loc['a'])
except KeyError:
    print("Row 'a' not found in the DataFrame")

# Accessing a missing column label (this will raise a KeyError)
try:
    print(df['C'])
except KeyError:
    print("Column 'C' not found in the DataFrame")

# Using .reindex() to handle missing rows
new_df = df.reindex(['x', 'y', 'z', 'a'])
print(f"\nReindexed DataFrame with a missing row 'a':\n{new_df}")
# Output: NaN for missing row 'a'

# Using .reindex() to handle missing columns
new_df_columns = df.reindex(columns=['A', 'B', 'C'])
print(f"\nReindexed DataFrame with a missing column 'C':\n{new_df_columns}")
# Output: NaN for missing column 'C'

# Filling missing values in the DataFrame with .fillna()
filled_df = new_df.fillna(0)
print(f"\nReindexed DataFrame with NaN values filled with 0:\n{filled_df}")

# =======================
# Handling NaN Values in Series
# =======================
# Creating a Series with NaN values
series_with_nan = pd.Series([10, np.nan, 30, np.nan, 50], index=['a', 'b', 'c', 'd', 'e'])
print(f"\nOriginal Series with NaN values:\n{series_with_nan}")

# Filling NaN values with a specific value (e.g., 0)
filled_series_with_nan = series_with_nan.fillna(0)
print(f"\nSeries with NaN values filled with 0:\n{filled_series_with_nan}")

# =======================
# Handling NaN Values in DataFrame
# =======================
# Creating a DataFrame with NaN values
df_with_nan = pd.DataFrame({'A': [1, np.nan, 3], 'B': [4, 5, np.nan]}, index=['x', 'y', 'z'])
print(f"\nDataFrame with NaN values:\n{df_with_nan}")

# Filling NaN values in the DataFrame with a specific value (e.g., 0)
filled_df_with_nan = df_with_nan.fillna(0)
print(f"\nDataFrame with NaN values filled with 0:\n{filled_df_with_nan}")

# =======================
# Reindexing and Filling NaN in DataFrame
# =======================
# Reindexing DataFrame with a missing row and filling NaN with specific values
reindexed_filled_df = df_with_nan.reindex(['x', 'y', 'z', 'a']).fillna(0)
print(f"\nReindexed DataFrame with missing row 'a' and NaN values filled with 0:\n{reindexed_filled_df}")

# Reindexing DataFrame with a missing column and filling NaN with specific values
reindexed_filled_df_columns = df_with_nan.reindex(columns=['A', 'B', 'C']).fillna(0)
print(f"\nReindexed DataFrame with missing column 'C' and NaN values filled with 0:\n{reindexed_filled_df_columns}")


Label 'd' not found in the Series
Accessing missing label 'd' using .get(): Label not found

Reindexed Series with a missing label 'd':
a    10.0
b    20.0
c    30.0
d     NaN
dtype: float64

Reindexed Series with NaN values filled with 0:
a    10.0
b    20.0
c    30.0
d     0.0
dtype: float64
Row 'a' not found in the DataFrame
Column 'C' not found in the DataFrame

Reindexed DataFrame with a missing row 'a':
     A    B
x  1.0  4.0
y  2.0  5.0
z  3.0  6.0
a  NaN  NaN

Reindexed DataFrame with a missing column 'C':
   A  B   C
x  1  4 NaN
y  2  5 NaN
z  3  6 NaN

Reindexed DataFrame with NaN values filled with 0:
     A    B
x  1.0  4.0
y  2.0  5.0
z  3.0  6.0
a  0.0  0.0

Original Series with NaN values:
a    10.0
b     NaN
c    30.0
d     NaN
e    50.0
dtype: float64

Series with NaN values filled with 0:
a    10.0
b     0.0
c    30.0
d     0.0
e    50.0
dtype: float64

DataFrame with NaN values:
     A    B
x  1.0  4.0
y  NaN  5.0
z  3.0  NaN

DataFrame with NaN values filled with 0

In [26]:
### Indexing with Boolean

In [27]:
import pandas as pd
import numpy as np

# =======================
# Indexing with Boolean Masks for Series
# =======================
# Creating a Series with some data
series = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])

# Creating a Boolean mask (True for values greater than 20)
mask = series > 20

# Using the mask to index the Series
filtered_series = series[mask]
print(f"Series with values greater than 20:\n{filtered_series}")
# Output: Only 'c', 'd', 'e' should be selected as they have values > 20

# =======================
# Indexing with Boolean Masks for DataFrame
# =======================
# Creating a DataFrame with some data
data = {'A': [1, 2, 3, 4, 5], 'B': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data, index=['x', 'y', 'z', 'w', 'v'])

# Creating a Boolean mask for column 'A' (values greater than 2)
mask_df_A = df['A'] > 2

# Using the mask to filter rows in the DataFrame
filtered_df = df[mask_df_A]
print(f"\nDataFrame with rows where column 'A' > 2:\n{filtered_df}")
# Output: Rows where column 'A' values are greater than 2

# Creating a Boolean mask for both columns (where 'A' > 2 and 'B' < 40)
mask_df_both = (df['A'] > 2) & (df['B'] < 40)

# Using the mask to filter rows in the DataFrame
filtered_df_both = df[mask_df_both]
print(f"\nDataFrame with rows where 'A' > 2 and 'B' < 40:\n{filtered_df_both}")
# Output: Rows where both conditions are true

# =======================
# Indexing with Boolean Masks for NaN values in DataFrame
# =======================
# Creating a DataFrame with NaN values
df_with_nan = pd.DataFrame({'A': [1, 2, np.nan, 4, 5], 'B': [10, np.nan, 30, 40, 50]})
print(f"\nDataFrame with NaN values:\n{df_with_nan}")

# Creating a Boolean mask to filter rows with non-NaN values in column 'A'
mask_non_nan_A = df_with_nan['A'].notna()

# Using the mask to filter rows in the DataFrame
filtered_non_nan_A = df_with_nan[mask_non_nan_A]
print(f"\nDataFrame with rows where column 'A' is not NaN:\n{filtered_non_nan_A}")

# =======================
# Indexing with Boolean Masks for NaN values in Series
# =======================
# Creating a Series with NaN values
series_with_nan = pd.Series([1, 2, np.nan, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
print(f"\nSeries with NaN values:\n{series_with_nan}")

# Creating a Boolean mask to filter non-NaN values
mask_non_nan = series_with_nan.notna()

# Using the mask to filter non-NaN values in the Series
filtered_non_nan_series = series_with_nan[mask_non_nan]
print(f"\nSeries with non-NaN values:\n{filtered_non_nan_series}")


Series with values greater than 20:
c    30
d    40
e    50
dtype: int64

DataFrame with rows where column 'A' > 2:
   A   B
z  3  30
w  4  40
v  5  50

DataFrame with rows where 'A' > 2 and 'B' < 40:
   A   B
z  3  30

DataFrame with NaN values:
     A     B
0  1.0  10.0
1  2.0   NaN
2  NaN  30.0
3  4.0  40.0
4  5.0  50.0

DataFrame with rows where column 'A' is not NaN:
     A     B
0  1.0  10.0
1  2.0   NaN
3  4.0  40.0
4  5.0  50.0

Series with NaN values:
a    1.0
b    2.0
c    NaN
d    4.0
e    5.0
dtype: float64

Series with non-NaN values:
a    1.0
b    2.0
d    4.0
e    5.0
dtype: float64


### Using .at[] and .iat[] for Fast Scalar Access

In [29]:
import pandas as pd

# =======================
# Using .at[] and .iat[] for Series
# =======================
# Creating a Series with custom labels
series = pd.Series([10, 20, 30, 40], index=['a', 'b', 'c', 'd'])

# Using .at[] to access an element by label (Access by Label)
value_at_label = series.at['b']
print(f"Value at label 'b': {value_at_label}")
# Output: 20

# Using .iat[] to access an element by integer location (Access by Integer Location)
value_at_position = series.iat[1]
print(f"Value at position 1: {value_at_position}")
# Output: 20

# =======================
# Using .at[] and .iat[] for DataFrame
# =======================
# Creating a DataFrame with custom row and column labels
data = {'A': [1, 2, 3, 4], 'B': [10, 20, 30, 40]}
df = pd.DataFrame(data, index=['x', 'y', 'z', 'w'])

# Using .at[] to access a specific element by row and column labels (Access by Label)
value_at_label_df = df.at['y', 'B']
print(f"Value at row 'y' and column 'B': {value_at_label_df}")
# Output: 20

# Using .iat[] to access a specific element by row and column positions (Access by Integer Location)
value_at_position_df = df.iat[1, 1]
print(f"Value at row position 1 and column position 1: {value_at_position_df}")
# Output: 20


Value at label 'b': 20
Value at position 1: 20
Value at row 'y' and column 'B': 20
Value at row position 1 and column position 1: 20


In [31]:
###  Indexing with Conditions Based on Labels

In [32]:
import pandas as pd

# =======================
# Indexing with Conditions Based on Labels (Series)
# =======================
# Creating a Series with custom labels
series = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])

# Condition: Select elements greater than 20
condition_series = series[series > 20]
print(f"Series elements greater than 20:\n{condition_series}")
# Output:
# c    30
# d    40
# e    50
# dtype: int64

# Condition: Select elements where label is 'b' or 'd'
condition_label_series = series[series.index.isin(['b', 'd'])]
print(f"\nSeries elements where label is 'b' or 'd':\n{condition_label_series}")
# Output:
# b    20
# d    40
# dtype: int64

# =======================
# Indexing with Conditions Based on Labels (DataFrame)
# =======================
# Creating a DataFrame with custom row and column labels
data = {'A': [10, 20, 30, 40], 'B': [50, 60, 70, 80]}
df = pd.DataFrame(data, index=['x', 'y', 'z', 'w'])

# Condition: Select rows where column 'A' is greater than 20
condition_df = df[df['A'] > 20]
print(f"\nDataFrame rows where column 'A' is greater than 20:\n{condition_df}")
# Output:
#    A   B
# z  30  70
# w  40  80

# Condition: Select rows where the label is 'y' or 'w'
condition_label_df = df[df.index.isin(['y', 'w'])]
print(f"\nDataFrame rows where index is 'y' or 'w':\n{condition_label_df}")
# Output:
#    A   B
# y  20  60
# w  40  80

# Condition: Select rows where both column 'A' > 15 and column 'B' > 50
condition_multi_df = df[(df['A'] > 15) & (df['B'] > 50)]
print(f"\nDataFrame rows where 'A' > 15 and 'B' > 50:\n{condition_multi_df}")
# Output:
#    A   B
# z  30  70
# w  40  80


Series elements greater than 20:
c    30
d    40
e    50
dtype: int64

Series elements where label is 'b' or 'd':
b    20
d    40
dtype: int64

DataFrame rows where column 'A' is greater than 20:
    A   B
z  30  70
w  40  80

DataFrame rows where index is 'y' or 'w':
    A   B
y  20  60
w  40  80

DataFrame rows where 'A' > 15 and 'B' > 50:
    A   B
y  20  60
z  30  70
w  40  80


### Using .query() for Label-Based Querying

In [34]:
import pandas as pd

# =======================
# Using .query() for Label-Based Querying (Series)
# =======================
# Creating a Series with custom labels
series = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])

# Using .query() for label-based querying (it works with Series but requires DataFrame-like syntax)
# Note: Using `.query()` directly on Series isn't as intuitive as on DataFrames.
# We will convert the Series to a DataFrame-like structure for querying.

series_df = series.to_frame('values')  # Convert Series to DataFrame-like structure

# Query for values greater than 20
query_series = series_df.query('values > 20')
print(f"Series values greater than 20 using .query():\n{query_series}")
# Output:
#    values
# c      30
# d      40
# e      50

# =======================
# Using .query() for Label-Based Querying (DataFrame)
# =======================
# Creating a DataFrame with custom row and column labels
data = {'A': [10, 20, 30, 40], 'B': [50, 60, 70, 80]}
df = pd.DataFrame(data, index=['x', 'y', 'z', 'w'])

# Query for rows where column 'A' is greater than 20
query_df_a = df.query('A > 20')
print(f"\nDataFrame rows where column 'A' is greater than 20 using .query():\n{query_df_a}")
# Output:
#    A   B
# z  30  70
# w  40  80

# Query for rows where column 'B' is greater than 60
query_df_b = df.query('B > 60')
print(f"\nDataFrame rows where column 'B' is greater than 60 using .query():\n{query_df_b}")
# Output:
#    A   B
# z  30  70
# w  40  80

# Query for rows where column 'A' is greater than 15 and column 'B' is less than 80
query_df_multi = df.query('A > 15 and B < 80')
print(f"\nDataFrame rows where 'A' > 15 and 'B' < 80 using .query():\n{query_df_multi}")
# Output:
#    A   B
# z  30  70


Series values greater than 20 using .query():
   values
c      30
d      40
e      50

DataFrame rows where column 'A' is greater than 20 using .query():
    A   B
z  30  70
w  40  80

DataFrame rows where column 'B' is greater than 60 using .query():
    A   B
z  30  70
w  40  80

DataFrame rows where 'A' > 15 and 'B' < 80 using .query():
    A   B
y  20  60
z  30  70


### Advanced Pandas Label Handling with Alignment

In [38]:
import pandas as pd

# ============================
# Label Duplication in Index (Handling Duplicates)
# ============================
# Series with duplicate labels
series_dup = pd.Series([10, 20, 30, 40], index=['a', 'b', 'b', 'd'])
print(f"Series with duplicate labels:\n{series_dup}")
# Output:
# a    10
# b    20
# b    30
# d    40
# dtype: int64

# Handling duplicate labels: selecting all occurrences using .loc[]
print(f"\nAccessing duplicate label 'b':\n{series_dup.loc['b']}")
# Output:
# b    20
# b    30
# dtype: int64

# ============================
# Using .get() for Label Access
# ============================
# Accessing labels safely with .get(), returns NaN if label doesn't exist
print(f"\nUsing .get() to access 'b': {series_dup.get('b')}")
# Output:
# b    20
# b    30
# dtype: int64

# Using .get() to access non-existing label (safe way)
print(f"Using .get() to access non-existing 'e': {series_dup.get('e')}")
# Output:
# None

# ============================
# Label-Based Sorting
# ============================
# Sorting Series by index labels
sorted_series = series_dup.sort_index()
print(f"\nSeries sorted by index labels:\n{sorted_series}")
# Output:
# a    10
# b    20
# b    30
# d    40
# dtype: int64

# DataFrame sorting by index labels
df_dup = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['z', 'x', 'y'])
sorted_df = df_dup.sort_index()
print(f"\nDataFrame sorted by index labels:\n{sorted_df}")
# Output:
#    A  B
# x  2  5
# y  3  6
# z  1  4

# Sorting DataFrame by values (for column 'A')
sorted_by_values_df = df_dup.sort_values(by='A')
print(f"\nDataFrame sorted by values in column 'A':\n{sorted_by_values_df}")
# Output:
#    A  B
# z  1  4
# x  2  5
# y  3  6

# ============================
# Label Alignment
# ============================
# Creating two Series with different indices
series1 = pd.Series([10, 20], index=['a', 'b'])
series2 = pd.Series([30, 40], index=['b', 'c'])

# Adding the two Series with automatic label alignment
aligned_series = series1 + series2
print(f"\nAligned Series (using addition between two Series):\n{aligned_series}")
# Output:
# a     NaN
# b    50.0
# c     NaN
# dtype: float64

# ============================
# Indexing with Conditions Based on Labels
# ============================
# Accessing Series using conditions based on labels
condition_series = series_dup[series_dup > 20]
print(f"\nSeries with condition '> 20':\n{condition_series}")
# Output:
# b    30
# d    40
# dtype: int64

# Accessing DataFrame with condition based on labels
df_condition = df_dup[df_dup['A'] > 1]
print(f"\nDataFrame with condition 'A > 1':\n{df_condition}")
# Output:
#    A  B
# x  2  5
# y  3  6

# ============================
# Handling Missing Labels and Reindexing
# ============================
# Reindexing Series with new labels
new_index = ['a', 'b', 'c', 'd']

# Handling duplicate index before reindexing:
# Drop duplicate indices or use unique indices for reindexing
if series_dup.index.is_unique == False:
    series_dup = series_dup.groupby(level=0).first()  # Keep first occurrence in case of duplicates

reindexed_series = series_dup.reindex(new_index)
print(f"\nReindexed Series:\n{reindexed_series}")
# Output:
# a    10.0
# b    20.0
# c     NaN
# d    40.0
# dtype: float64

# Filling missing labels using .fillna() for Series
filled_series = reindexed_series.fillna(0)
print(f"\nFilled Series (missing values filled with 0):\n{filled_series}")
# Output:
# a    10.0
# b    20.0
# c     0.0
# d    40.0
# dtype: float64

# Reindexing DataFrame with new labels
new_columns = ['A', 'B', 'C']

# Handling duplicate column labels
if df_dup.columns.is_unique == False:
    df_dup = df_dup.loc[:, ~df_dup.columns.duplicated()]  # Remove duplicated columns

reindexed_df = df_dup.reindex(columns=new_columns)
print(f"\nReindexed DataFrame:\n{reindexed_df}")
# Output:
#    A  B   C
# x  2  5 NaN
# y  3  6 NaN
# z  1  4 NaN


Series with duplicate labels:
a    10
b    20
b    30
d    40
dtype: int64

Accessing duplicate label 'b':
b    20
b    30
dtype: int64

Using .get() to access 'b': b    20
b    30
dtype: int64
Using .get() to access non-existing 'e': None

Series sorted by index labels:
a    10
b    20
b    30
d    40
dtype: int64

DataFrame sorted by index labels:
   A  B
x  2  5
y  3  6
z  1  4

DataFrame sorted by values in column 'A':
   A  B
z  1  4
x  2  5
y  3  6

Aligned Series (using addition between two Series):
a     NaN
b    50.0
c     NaN
dtype: float64

Series with condition '> 20':
b    30
d    40
dtype: int64

DataFrame with condition 'A > 1':
   A  B
x  2  5
y  3  6

Reindexed Series:
a    10.0
b    20.0
c     NaN
d    40.0
dtype: float64

Filled Series (missing values filled with 0):
a    10.0
b    20.0
c     0.0
d    40.0
dtype: float64

Reindexed DataFrame:
   A  B   C
z  1  4 NaN
x  2  5 NaN
y  3  6 NaN
