### map()Method on Series

In [2]:
import pandas as pd
# New sample Series
data = pd.Series(['mango', 'kiwi', 'mango', 'pear', 'kiwi'])
# New mapping dictionary
mapping = {
    'mango': 'orange',
    'kiwi': 'green',
    'pear': 'yellow'
}
# Apply the map function
result = data.map(mapping)
print(result)


0    orange
1     green
2    orange
3    yellow
4     green
dtype: object


### map()Method on Series
#### Pass a function to map().

In [9]:
import pandas as pd
# Sample Series with new values
data = pd.Series(['lion', 'tiger', 'leopard', 'cheetah'])
# Apply a lambda function to convert each string to uppercase
result = data.map(lambda x: x.upper())
print(result)


0       LION
1      TIGER
2    LEOPARD
3    CHEETAH
dtype: object


### map()Stage1:Import Required	Libraries

In [12]:
import	pandas	as	pd
import	numpy	as	np

### map() Stage	2:Create the DataFrame

In [16]:
import pandas as pd
import numpy as np
# Create a DataFrame with random values
df = pd.DataFrame(np.random.randn(4, 3),
                  columns=['X', 'Y', 'Z'],
                  index=['first', 'second', 'third', 'fourth'])
print(df)

               X         Y         Z
first  -0.243774  0.849517 -0.366171
second  0.072425 -0.587080  1.752831
third  -0.892557  0.709059 -0.422391
fourth -0.754652 -1.245145 -1.510275


### map()Stage3:Define and Apply a Function to Each	Column

In [20]:
import pandas as pd
import numpy as np
# Sample DataFrame with random values
df = pd.DataFrame(np.random.randn(4, 3),
                  columns=['X', 'Y', 'Z'],
                  index=['row1', 'row2', 'row3', 'row4'])
# Define function to compute range
f = lambda x: x.max() - x.min()
# Apply the function to each column
print("Range of each column:")
print(df.apply(f))


Range of each column:
X    1.658005
Y    2.634975
Z    3.333607
dtype: float64


### map()Stage4:Apply the Same Function	Across	Rows

In [23]:
import pandas as pd
import numpy as np
# Create a DataFrame with random numbers
df = pd.DataFrame(np.random.randn(4, 3),
                  columns=['X', 'Y', 'Z'],
                  index=['row1', 'row2', 'row3', 'row4'])
# Format each element to 2 decimal places
formatted_df = df.applymap(lambda x: '%.2f' % x)
# Display the formatted DataFrame
print("\nFormatted DataFrame (2 decimal places):")
print(formatted_df)



Formatted DataFrame (2 decimal places):
          X     Y      Z
row1  -2.52  0.12   0.17
row2   0.49  0.43   0.79
row3   0.11  0.91   0.12
row4   1.77  0.20  -0.30


  formatted_df = df.applymap(lambda x: '%.2f' % x)


### map()Stage5:Apply a	Formatting Function	to Each	Element

In [26]:
import pandas as pd
import numpy as np
# Create a DataFrame with random values
df = pd.DataFrame(np.random.randn(4, 3),
                  columns=['A', 'B', 'C'],
                  index=['one', 'two', 'three', 'four'])
# Define a function to calculate range (max - min)
f = lambda x: x.max() - x.min()
# Apply function across rows (axis=1)
print("\nRange of each row:")
print(df.apply(f, axis='columns'))  # or axis=1



Range of each row:
one      2.833216
two      1.505699
three    1.872743
four     0.861865
dtype: float64


### In Class Practice

In [31]:
import pandas as pd
df = pd.DataFrame({
    'name': ['David', 'Emma', 'Fiona'],
    'score': [76, 88, 94]
})
# Assign grades
df['grade'] = df['score'].map(lambda s: 'A' if s >= 90 else ('B' if s >= 80 else 'C'))
# Round numeric columns to 1 decimal place (without warning)
df[df.select_dtypes(include='number').columns] = df.select_dtypes(include='number').round(1)
print(df)

    name  score grade
0  David     76     C
1   Emma     88     B
2  Fiona     94     A


### Replacing Value

In [34]:
import pandas as pd
import numpy as np
# Create a Series with custom values
data = pd.Series([4, -1, 5, -888, 6, -1])
# Replace -1 and -888 with NaN
data_cleaned = data.replace([-1, -888], np.nan)
print(data_cleaned)


0    4.0
1    NaN
2    5.0
3    NaN
4    6.0
5    NaN
dtype: float64


In [38]:
import pandas as pd
import numpy as np
# Create a sample Series
data = pd.Series([1, -999, 2, -999, -1000, 3])

In [40]:
data.replace([-999, -1000], np.nan)

0    1.0
1    NaN
2    2.0
3    NaN
4    NaN
5    3.0
dtype: float64

### rename()Method

In [43]:
import pandas as pd
import numpy as np
# Create a DataFrame
df = pd.DataFrame(
    np.arange(12).reshape((3, 4)),
    index=['OHIO', 'COLORADO', 'NEW YORK'],
    columns=['one', 'two', 'three', 'four']
)
# Rename index to lowercase, columns to uppercase
df_renamed = df.rename(index=str.lower, columns=str.upper)
print(df_renamed)


          ONE  TWO  THREE  FOUR
ohio        0    1      2     3
colorado    4    5      6     7
new york    8    9     10    11


### Using Dictionary: Selective	renaming

In [46]:
import pandas as pd
import numpy as np
# Create the original DataFrame
df = pd.DataFrame(
    np.arange(12).reshape((3, 4)),
    index=['OHIO', 'COLORADO', 'NEW YORK'],
    columns=['one', 'two', 'three', 'four']
)
# Selective renaming
df_renamed = df.rename(
    index={'OHIO': 'INDIANA'},
    columns={'three': 'peekaboo'}
)
print(df_renamed)

          one  two  peekaboo  four
INDIANA     0    1         2     3
COLORADO    4    5         6     7
NEW YORK    8    9        10    11


### IN_Class Practice

In [51]:
import pandas as pd
# Step 1: Create the original DataFrame
df = pd.DataFrame({
    'C1': [100, -5, 200, -50],
    'C2': ['A', 'B', 'A', 'C']
}, index=['R1', 'R2', 'R3', 'R4'])
# Step 2: Replace -5 with 0 and -50 with -1
df.replace({-5: 0, -50: -1}, inplace=True)
# Step 3: Rename index labels to uppercase (they already are, but for practice)
df.index = df.index.str.upper()
# Step 4: Rename columns using a dictionary (modify in place)
df.rename(columns={'C1': 'value', 'C2': 'category'}, inplace=True)
# Display the final DataFrame
print(df)

    value category
R1    100        A
R2      0        B
R3    200        A
R4     -1        C


### Discretization and	Binning
#### labels	for	the	bins

In [54]:
import pandas as pd
# Step 1: Define a new list of ages
ages = [12, 15, 19, 23, 29, 34, 38, 41, 47, 53, 60, 67]
# Step 2: Define new bin edges
bins = [10, 20, 30, 50, 70]
# Step 3: Use pd.cut to bin the ages (default labels)
cats = pd.cut(ages, bins)
print("Binned Ages with Default Labels:")
print(cats)
# Step 4: Assign custom labels to the bins
group_names = ['Teen', 'Young Adult', 'Adult', 'Senior']
labeled_cats = pd.cut(ages, bins, labels=group_names)
print("\nBinned Ages with Custom Labels:")
print(labeled_cats)

Binned Ages with Default Labels:
[(10, 20], (10, 20], (10, 20], (20, 30], (20, 30], ..., (30, 50], (30, 50], (50, 70], (50, 70], (50, 70]]
Length: 12
Categories (4, interval[int64, right]): [(10, 20] < (20, 30] < (30, 50] < (50, 70]]

Binned Ages with Custom Labels:
['Teen', 'Teen', 'Teen', 'Young Adult', 'Young Adult', ..., 'Adult', 'Adult', 'Senior', 'Senior', 'Senior']
Length: 12
Categories (4, object): ['Teen' < 'Young Adult' < 'Adult' < 'Senior']


#### pd.cut() Function

In [56]:
import pandas as pd
import numpy as np
# Step 1: Generate new random data (200 values)
Data = np.random.randn(200)
# Step 2: Use qcut to divide into 5 quantile-based bins
cats = pd.qcut(Data, 5)
# Step 3: Count the number of values in each bin
bin_counts = cats.value_counts(cats)
print("Quintile Bins and Their Counts:")
print(bin_counts)

Quintile Bins and Their Counts:
(-2.737, -0.963]    40
(-0.963, -0.451]    40
(-0.451, 0.228]     40
(0.228, 0.841]      40
(0.841, 2.77]       40
Name: count, dtype: int64


### In Class Pretice

In [60]:
import pandas as pd
import numpy as np
# Step 1: Define the array of scores
scores = np.array([78, 92, 85, 65, 98, 70, 88, 75, 80, 95])
# Step 2: Define bin edges and labels for pd.cut()
cut_bins = [0, 70, 85, 95, np.inf]
cut_labels = ['Fail', 'Pass', 'Merit', 'Distinction']
# Step 3: Categorize scores using pd.cut()
cut_categories = pd.cut(scores, bins=cut_bins, labels=cut_labels, right=False)
print("Score Categories using pd.cut():")
print(cut_categories)
# Step 4: Use pd.qcut() to split into 4 equal-sized bins
qcut_categories = pd.qcut(scores, q=4)
print("\nScore Quantile Bins using pd.qcut():")
print(qcut_categories)
# Step 5: Count number of scores in each quantile bin
qcut_counts = qcut_categories.value_counts()
print("\nNumber of scores in each quantile-based bin:")
print(qcut_counts)

Score Categories using pd.cut():
['Pass', 'Merit', 'Merit', 'Fail', 'Distinction', 'Pass', 'Merit', 'Pass', 'Pass', 'Distinction']
Categories (4, object): ['Fail' < 'Pass' < 'Merit' < 'Distinction']

Score Quantile Bins using pd.qcut():
[(75.75, 82.5], (91.0, 98.0], (82.5, 91.0], (64.999, 75.75], (91.0, 98.0], (64.999, 75.75], (82.5, 91.0], (64.999, 75.75], (75.75, 82.5], (91.0, 98.0]]
Categories (4, interval[float64, right]): [(64.999, 75.75] < (75.75, 82.5] < (82.5, 91.0] < (91.0, 98.0]]

Number of scores in each quantile-based bin:
(64.999, 75.75]    3
(75.75, 82.5]      2
(82.5, 91.0]       2
(91.0, 98.0]       3
Name: count, dtype: int64


### Filtering Outliers (Standard Deviation)

In [64]:
import numpy as np
import pandas as pd
# Step 1: Create a DataFrame of Random Numbers (1000 rows, 4 columns)
np.random.seed(42)  # for reproducibility
data = pd.DataFrame(np.random.randn(1000, 4))
# Step 2: Describe the Data (Basic Statistics)
print("Original Data Description:")
print(data.describe())

# Step 3: Identify Outliers in the First Column (values > 3 std deviations from mean)
col = data[0]
outliers_col0 = col[np.abs(col) > 3]

print("\nOutliers in the first column (abs > 3):")
print(outliers_col0)

Original Data Description:
                 0            1            2            3
count  1000.000000  1000.000000  1000.000000  1000.000000
mean      0.030624     0.024828    -0.008255     0.030086
std       0.963919     1.011884     1.006075     1.006964
min      -3.019512    -2.896255    -3.241267    -2.991136
25%      -0.612942    -0.677037    -0.675299    -0.670871
50%       0.056187     0.020210    -0.007509     0.021158
75%       0.664881     0.693881     0.642282     0.695878
max       3.243093     3.852731     3.152057     3.926238

Outliers in the first column (abs > 3):
506   -3.019512
929    3.243093
Name: 0, dtype: float64


In [66]:
import numpy as np
import pandas as pd
# Step 1: Generate random data
np.random.seed(42)
data = pd.DataFrame(np.random.randn(1000, 4))
# Step 4: Select rows where any column has abs(value) > 3
rows_with_outliers = data[(np.abs(data) > 3).any(axis=1)]
print(f"\nRows with any value exceeding |3| (Total: {len(rows_with_outliers)} rows):")
print(rows_with_outliers)
# Step 5: Cap values to +/-3 using np.sign to preserve direction
data[np.abs(data) > 3] = np.sign(data) * 3

# Step 6: Describe the capped data
print("\nData Description After Capping Outliers to ±3:")
print(data.describe())


Rows with any value exceeding |3| (Total: 10 rows):
            0         1         2         3
52   0.515048  3.852731  0.570891  1.135566
65  -0.926930 -0.059525 -3.241267 -1.024388
119  0.576557  0.311250  3.078881  1.119575
403  0.883110 -0.077837 -0.180480  3.193108
489 -2.135674  3.137749  1.056057  0.223239
506 -3.019512  0.183850  1.800511  1.238946
576  1.995667  3.109919  0.606723 -0.183197
723  0.768207  0.215397  0.508269  3.926238
929  3.243093  2.307916 -0.181449 -0.106337
995  1.362563  1.640615  3.152057 -1.123494

Data Description After Capping Outliers to ±3:
                 0            1            2            3
count  1000.000000  1000.000000  1000.000000  1000.000000
mean      0.030400     0.023728    -0.008245     0.028967
std       0.963077     1.008264     1.004621     1.003207
min      -3.000000    -2.896255    -3.000000    -2.991136
25%      -0.612942    -0.677037    -0.675299    -0.670871
50%       0.056187     0.020210    -0.007509     0.021158
75%      

### In_class Pratice7

In [69]:
import numpy as np
import pandas as pd
# Step 1: Create a pandas Series with artificially introduced outliers
data = pd.Series(np.concatenate([np.random.randn(95), [10, -8, 12, -15, 9]]))
# Step 2: Identify outliers (values > 2 std deviations from the mean)
mean = data.mean()
std = data.std()
# Outlier condition: values beyond ±2 standard deviations
outliers = data[(np.abs(data - mean) > 2 * std)]
print("Outliers detected:")
print(outliers)
# Step 3: Filter out the outliers from the Series
cleaned_data = data[np.abs(data - mean) <= 2 * std]
print("\nData after removing outliers:")
print(cleaned_data)

Outliers detected:
95    10.0
96    -8.0
97    12.0
98   -15.0
99     9.0
dtype: float64

Data after removing outliers:
0    -0.863494
1    -0.031203
2     0.018017
3     0.472630
4    -1.366858
        ...   
90   -1.098620
91    1.420504
92   -0.113481
93    0.221558
94    1.234752
Length: 95, dtype: float64


### np.random.permutation()	and	take()/.sample()

In [72]:
import numpy as np
import pandas as pd
# Step 1: Create a 5x4 DataFrame
df = pd.DataFrame(np.arange(5 * 4).reshape((5, 4)))
print("Original DataFrame:")
print(df)

# Step 2: Shuffle the row indices using numpy's permutation
sampler = np.random.permutation(5)
print("\nRandom permutation of row indices:")
print(sampler)

# Step 3: Use take() to reorder the rows according to the permutation
shuffled_df = df.take(sampler)
print("\nDataFrame after applying take() with permutation:")
print(shuffled_df)

# Step 4: Use sample() to randomly select rows
# Randomly select 3 rows
print("\nRandomly sample 3 rows:")
print(df.sample(n=3))

# Randomly select 50% of the rows
print("\nRandomly sample 50% of the rows:")
print(df.sample(frac=0.5))

# Sample 10 rows with replacement (allows duplicates)
print("\nRandomly sample 10 rows with replacement:")
print(df.sample(n=10, replace=True))


Original DataFrame:
    0   1   2   3
0   0   1   2   3
1   4   5   6   7
2   8   9  10  11
3  12  13  14  15
4  16  17  18  19

Random permutation of row indices:
[0 1 2 3 4]

DataFrame after applying take() with permutation:
    0   1   2   3
0   0   1   2   3
1   4   5   6   7
2   8   9  10  11
3  12  13  14  15
4  16  17  18  19

Randomly sample 3 rows:
    0   1   2   3
3  12  13  14  15
1   4   5   6   7
4  16  17  18  19

Randomly sample 50% of the rows:
    0   1   2   3
3  12  13  14  15
1   4   5   6   7

Randomly sample 10 rows with replacement:
    0   1   2   3
2   8   9  10  11
1   4   5   6   7
3  12  13  14  15
3  12  13  14  15
1   4   5   6   7
3  12  13  14  15
0   0   1   2   3
3  12  13  14  15
1   4   5   6   7
3  12  13  14  15


### In_Class Practice8

In [75]:
import pandas as pd
import numpy as np
# Step 1: Create a DataFrame with 10 rows and 3 columns of random numbers
np.random.seed(42)  # For reproducibility
df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'])
print("Original DataFrame:\n", df)
# Step 2: Randomly permute the order of the rows
permuted_df = df.take(np.random.permutation(len(df)))
print("\nRandomly Permuted DataFrame:\n", permuted_df)
# Step 3: Randomly sample 5 rows without replacement
sampled_5 = df.sample(n=5, replace=False)
print("\nRandomly Sampled 5 Rows (without replacement):\n", sampled_5)
# Step 4: Randomly sample 7 rows with replacement
sampled_7 = df.sample(n=7, replace=True)
print("\nRandomly Sampled 7 Rows (with replacement):\n", sampled_7)

Original DataFrame:
           A         B         C
0  0.496714 -0.138264  0.647689
1  1.523030 -0.234153 -0.234137
2  1.579213  0.767435 -0.469474
3  0.542560 -0.463418 -0.465730
4  0.241962 -1.913280 -1.724918
5 -0.562288 -1.012831  0.314247
6 -0.908024 -1.412304  1.465649
7 -0.225776  0.067528 -1.424748
8 -0.544383  0.110923 -1.150994
9  0.375698 -0.600639 -0.291694

Randomly Permuted DataFrame:
           A         B         C
8 -0.544383  0.110923 -1.150994
5 -0.562288 -1.012831  0.314247
1  1.523030 -0.234153 -0.234137
7 -0.225776  0.067528 -1.424748
0  0.496714 -0.138264  0.647689
2  1.579213  0.767435 -0.469474
4  0.241962 -1.913280 -1.724918
9  0.375698 -0.600639 -0.291694
6 -0.908024 -1.412304  1.465649
3  0.542560 -0.463418 -0.465730

Randomly Sampled 5 Rows (without replacement):
           A         B         C
2  1.579213  0.767435 -0.469474
0  0.496714 -0.138264  0.647689
4  0.241962 -1.913280 -1.724918
9  0.375698 -0.600639 -0.291694
8 -0.544383  0.110923 -1.150994

Ra

### Computing Indicator/Dummy Variables

In [82]:
import pandas as pd
# Step 1: Create a DataFrame
df = pd.DataFrame({
    'Key': ['b', 'b', 'a', 'c', 'a', 'b'],
    'Data1': range(6)
})
# Step 2: Create dummy variables from the 'Key' column
dummies = pd.get_dummies(df['Key'], prefix='Key')

# Step 3: Join dummy variables back to original Data1 column
df_with_dummy = df[['Data1']].join(dummies)

# Step 4: Display the result
print(df_with_dummy)

   Data1  Key_a  Key_b  Key_c
0      0  False   True  False
1      1  False   True  False
2      2   True  False  False
3      3  False  False   True
4      4   True  False  False
5      5  False   True  False


### In_CLass Practice9

In [79]:
import pandas as pd
# Step 1: Create the original DataFrame
data = pd.DataFrame({
    'color': ['blue', 'green', 'blue', 'red', 'green'],
    'value': [10, 20, 30, 40, 50]
})
print("Original DataFrame:\n", data)

# Step 2: Create dummy variables for the 'color' column
dummies = pd.get_dummies(data['color'])
print("\nDummy Variables:\n", dummies)
# Step 3: Add dummy variables to the original DataFrame
data_with_dummies = pd.concat([data, dummies], axis=1)
print("\nFinal DataFrame with Dummy Variables:\n", data_with_dummies)

Original DataFrame:
    color  value
0   blue     10
1  green     20
2   blue     30
3    red     40
4  green     50

Dummy Variables:
     blue  green    red
0   True  False  False
1  False   True  False
2   True  False  False
3  False  False   True
4  False   True  False

Final DataFrame with Dummy Variables:
    color  value   blue  green    red
0   blue     10   True  False  False
1  green     20  False   True  False
2   blue     30   True  False  False
3    red     40  False  False   True
4  green     50  False   True  False


### Regular	Expressions

In [86]:
import pandas as pd
import re
# Step 1: Create the Series
data = pd.Series(['Dave van Ronk', 'Bob Dylan', 'Bob Marley', 'Kurt Cobain'])
print("Original Series:")
print(data)

# Step 2: Find all occurrences of the string 'Bob'
matches = data.str.findall(r'Bob')
print("\nFind all occurrences of 'Bob':")
print(matches)

# Step 3: Replace all occurrences of 'Bob' with 'Robert'
replaced = data.str.replace(r'Bob', 'Robert', regex=True)
print("\nReplace 'Bob' with 'Robert':")
print(replaced)

Original Series:
0    Dave van Ronk
1        Bob Dylan
2       Bob Marley
3      Kurt Cobain
dtype: object

Find all occurrences of 'Bob':
0       []
1    [Bob]
2    [Bob]
3       []
dtype: object

Replace 'Bob' with 'Robert':
0    Dave van Ronk
1     Robert Dylan
2    Robert Marley
3      Kurt Cobain
dtype: object


### String Functions in	pandas (.str	accessor)

In [89]:
import pandas as pd
# Step 1: Create a pandas Series with comma-separated strings
data = pd.Series(['one,two,three', 'four,five', 'six'])
print("Original Series:")
print(data)

# Step 2: Split each string by comma — returns a Series of lists
split_series = data.str.split(',')
print("\nSplit strings (as lists):")
print(split_series)

# Step 3: Get the second element (index 1) from each list
second_elements = data.str.get(1)
print("\nSecond element from each string (using str.get(1)):")
print(second_elements)

# Step 4: Split the strings into multiple columns — returns a DataFrame
split_df = data.str.split(',', expand=True)
print("\nSplit strings into separate columns (DataFrame):")
print(split_df)

Original Series:
0    one,two,three
1        four,five
2              six
dtype: object

Split strings (as lists):
0    [one, two, three]
1         [four, five]
2                [six]
dtype: object

Second element from each string (using str.get(1)):
0    n
1    o
2    i
dtype: object

Split strings into separate columns (DataFrame):
      0     1      2
0   one   two  three
1  four  five   None
2   six  None   None


### In_Class Pratice

In [92]:
import pandas as pd
# Step 1: Create a pandas Series of email addresses using Pakistani names
emails = pd.Series([
    'ahmed.khan@gmail.com',
    'fatima.bibi@yahoo.com',
    'usman.ali@nu.edu.pk',
    'zainab.naqvi@charity.com',
    'bilal.siddiq@organization.com'
])
print("Original Emails:")
print(emails)

# Step 2: Extract the username and domain
split_df = emails.str.split('@', expand=True)
split_df.columns = ['username', 'domain']
print("\nSplit into Username and Domain:")
print(split_df)

# Step 3: Count occurrences of the letter 'a' (case-insensitive)
a_counts = emails.str.lower().str.count('a')
print("\nCount of 'a' in each email address:")
print(a_counts)

# Step 4: Replace '.com' with '.org'
updated_emails = emails.str.replace('.com', '.org', regex=False)
print("\nEmails after replacing '.com' with '.org':")
print(updated_emails)

Original Emails:
0             ahmed.khan@gmail.com
1            fatima.bibi@yahoo.com
2              usman.ali@nu.edu.pk
3         zainab.naqvi@charity.com
4    bilal.siddiq@organization.com
dtype: object

Split into Username and Domain:
       username            domain
0    ahmed.khan         gmail.com
1   fatima.bibi         yahoo.com
2     usman.ali         nu.edu.pk
3  zainab.naqvi       charity.com
4  bilal.siddiq  organization.com

Count of 'a' in each email address:
0    3
1    3
2    2
3    4
4    3
dtype: int64

Emails after replacing '.com' with '.org':
0             ahmed.khan@gmail.org
1            fatima.bibi@yahoo.org
2              usman.ali@nu.edu.pk
3         zainab.naqvi@charity.org
4    bilal.siddiq@organization.org
dtype: object


### Categorical	Extension Type in pandas

In [95]:
import pandas as pd
# Step 1: Create a Series with dtype='category'
data = pd.Series(['foo', 'bar', 'baz', 'foo', 'bar', 'foo'], dtype='category')
print("Original categorical Series:")
print(data)

# Step 2: Define a custom order for the categories
categories = ['foo', 'bar', 'baz']
ordered_data = data.astype(
    pd.CategoricalDtype(categories=categories, ordered=True)
)
print("\nCategorical Series with custom order:")
print(ordered_data)

Original categorical Series:
0    foo
1    bar
2    baz
3    foo
4    bar
5    foo
dtype: category
Categories (3, object): ['bar', 'baz', 'foo']

Categorical Series with custom order:
0    foo
1    bar
2    baz
3    foo
4    bar
5    foo
dtype: category
Categories (3, object): ['foo' < 'bar' < 'baz']


### Computations with Categorical &	Categorical	Methods

In [98]:
import pandas as pd
# Step 1: Create a Series with categorical data
s = pd.Series(pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']))
print("Original Series:")
print(s)

# Step 2: Show the category labels
print("\nCategories:")
print(s.cat.categories)

# Step 3: Show the underlying integer codes for each category
print("\nCategory Codes:")
print(s.cat.codes)

# Step 4: Rename the categories to more descriptive labels
renamed = s.cat.rename_categories(['Group 1', 'Group 2', 'Group 3'])
print("\nRenamed Categories:")
print(renamed)

Original Series:
0    a
1    b
2    c
3    a
4    b
5    c
dtype: category
Categories (3, object): ['a', 'b', 'c']

Categories:
Index(['a', 'b', 'c'], dtype='object')

Category Codes:
0    0
1    1
2    2
3    0
4    1
5    2
dtype: int8

Renamed Categories:
0    Group 1
1    Group 2
2    Group 3
3    Group 1
4    Group 2
5    Group 3
dtype: category
Categories (3, object): ['Group 1', 'Group 2', 'Group 3']
