In [None]:
import pandas as pd

# Books Data
books_data = {
    'title': ['The Great Gatsby', 'To Kill a Mockingbird', '1984', 'Pride and Prejudice', 'The Catcher in the Rye'],
    'author': ['F. Scott Fitzgerald', 'Harper Lee', 'George Orwell', 'Jane Austen', 'J.D. Salinger'],
    'genre': ['Fiction', 'Fiction', 'Dystopian', 'Romance', 'Fiction'],
    'description': [
        'A novel set in the 1920s about the mysterious Jay Gatsby.',
        'A novel about the serious issues of rape and racial inequality.',
        'A dystopian novel set in a totalitarian society under constant surveillance.',
        'A romantic novel that critiques the British landed gentry at the end of the 18th century.',
        'A story about a young boy named Holden Caulfield and his experiences in New York City.'
    ]
}

books_df = pd.DataFrame(books_data)
print(books_df)

                    title               author      genre  \
0        The Great Gatsby  F. Scott Fitzgerald    Fiction   
1   To Kill a Mockingbird           Harper Lee    Fiction   
2                    1984        George Orwell  Dystopian   
3     Pride and Prejudice          Jane Austen    Romance   
4  The Catcher in the Rye        J.D. Salinger    Fiction   

                                         description  
0  A novel set in the 1920s about the mysterious ...  
1  A novel about the serious issues of rape and r...  
2  A dystopian novel set in a totalitarian societ...  
3  A romantic novel that critiques the British la...  
4  A story about a young boy named Holden Caulfie...  


**1.Convert a column to lowercase.**

In [None]:
books_df['title_lower'] = books_df['title'].str.lower()
print(books_df[['title', 'title_lower']])

                    title             title_lower
0        The Great Gatsby        the great gatsby
1   To Kill a Mockingbird   to kill a mockingbird
2                    1984                    1984
3     Pride and Prejudice     pride and prejudice
4  The Catcher in the Rye  the catcher in the rye


**2. Split a column into multiple columns.**

In [None]:
books_df[['first_name', 'middle_name', 'last_name']] = books_df['author'].str.split(' ', n=2, expand=True)
print(books_df[['author', 'first_name', 'middle_name', 'last_name']])

                author first_name middle_name   last_name
0  F. Scott Fitzgerald         F.       Scott  Fitzgerald
1           Harper Lee     Harper         Lee        None
2        George Orwell     George      Orwell        None
3          Jane Austen       Jane      Austen        None
4        J.D. Salinger       J.D.    Salinger        None


**3. Concatenate text columns.**

In [None]:
books_df['author_full'] = books_df['first_name'] + ' ' + books_df['middle_name'].fillna('') + ' ' + books_df['last_name']
books_df['author_full'] = books_df['author_full'].str.strip()
print(books_df[['first_name', 'middle_name', 'last_name', 'author_full']])

  first_name middle_name   last_name          author_full
0         F.       Scott  Fitzgerald  F. Scott Fitzgerald
1     Harper         Lee        None                  NaN
2     George      Orwell        None                  NaN
3       Jane      Austen        None                  NaN
4       J.D.    Salinger        None                  NaN


**4. Extract substrings from a column.**

In [None]:
books_df['title_substr'] = books_df['title'].str[0:5]  # Extract first 5 characters
print(books_df[['title', 'title_substr']])

                    title title_substr
0        The Great Gatsby        The G
1   To Kill a Mockingbird        To Ki
2                    1984         1984
3     Pride and Prejudice        Pride
4  The Catcher in the Rye        The C


**5. Remove whitespace from text data.**

In [None]:
books_df['author_trimmed'] = books_df['author'].str.strip()
print(books_df[['author', 'author_trimmed']])


                author       author_trimmed
0  F. Scott Fitzgerald  F. Scott Fitzgerald
1           Harper Lee           Harper Lee
2        George Orwell        George Orwell
3          Jane Austen          Jane Austen
4        J.D. Salinger        J.D. Salinger


**6. Replace text in a column.**

In [None]:
books_df['title_replaced'] = books_df['title'].str.replace('The', 'A', case=False)
print(books_df[['title', 'title_replaced']])


                    title         title_replaced
0        The Great Gatsby         A Great Gatsby
1   To Kill a Mockingbird  To Kill a Mockingbird
2                    1984                   1984
3     Pride and Prejudice    Pride and Prejudice
4  The Catcher in the Rye     A Catcher in A Rye


**7. Find the length of text in a column.**

In [None]:
books_df['title_length'] = books_df['title'].str.len()
print(books_df[['title', 'title_length']])


                    title  title_length
0        The Great Gatsby            16
1   To Kill a Mockingbird            21
2                    1984             4
3     Pride and Prejudice            19
4  The Catcher in the Rye            22


**8. Count occurrences of a substring in a column.**

In [None]:
books_df['a_count'] = books_df['description'].str.count('a')
print(books_df[['description', 'a_count']])

                                         description  a_count
0  A novel set in the 1920s about the mysterious ...        3
1  A novel about the serious issues of rape and r...        6
2  A dystopian novel set in a totalitarian societ...        7
3  A romantic novel that critiques the British la...        4
4  A story about a young boy named Holden Caulfie...        5


**9. Filter rows containing a specific substring.**

In [None]:
filter_mask = books_df['description'].str.contains('dystopian', case=False)
dystopian_books = books_df[filter_mask]
print(dystopian_books)


  title         author      genre  \
2  1984  George Orwell  Dystopian   

                                         description title_lower title_substr  \
2  A dystopian novel set in a totalitarian societ...        1984         1984   

  author_trimmed title_replaced  title_length  a_count  
2  George Orwell           1984             4        7  


**10. Apply a custom text function to a column.**

In [None]:
def custom_text_function(text):
    return text[::-1]  # Example function that reverses the text

books_df['title_reversed'] = books_df['title'].apply(custom_text_function)
print(books_df[['title', 'title_reversed']])



                    title          title_reversed
0        The Great Gatsby        ybstaG taerG ehT
1   To Kill a Mockingbird   dribgnikcoM a lliK oT
2                    1984                    4891
3     Pride and Prejudice     ecidujerP dna edirP
4  The Catcher in the Rye  eyR eht ni rehctaC ehT
