### str.replace(), .contains(), .extract()

In [None]:
import pandas as pd

# Creating a sample DataFrame with some messy text data
data = {
    'Text': [
        'Hello, World!!', 
        'Python is fun!!!', 
        '   Data Science is cool   ', 
        '  Machine Learning 101   ', 
        'Contact: user@example.com'
    ]
}
df = pd.DataFrame(data)

# Replacing unwanted punctuation and extra spaces using str.replace()
df['Cleaned_Text'] = df['Text'].str.replace(r'[^\w\s]', '', regex=True)  # Removing punctuation
df['Cleaned_Text'] = df['Cleaned_Text'].str.replace(r'\s+', ' ', regex=True).str.strip()  # Removing extra spaces

# Filtering rows that contain the word 'Python'
python_rows = df[df['Text'].str.contains('Python', regex=False)]

# Extracting email addresses from the text using a regular expression
df['Email'] = df['Text'].str.extract(r'(\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b)')

print(df)
print("\nRows with 'Python':")
print(python_rows)

# Output:
#                           Text                 Cleaned_Text                  Email
# 0               Hello, World!!               Hello World                     NaN
# 1            Python is fun!!!              Python is fun                     NaN
# 2    Data Science is cool              Data Science is cool                     NaN
# 3    Machine Learning 101              Machine Learning 101                     NaN
# 4    Contact: user@example.com              Contact userexamplecom    user@example.com

# Rows with 'Python':
#                  Text            Cleaned_Text
# 1  Python is fun!!!   Python is fun


#### Catogerical Datas

In [None]:
import pandas as pd

# Sample data
data = {
    'Name': ['John', 'Alice', 'Bob', 'Emma'],
    'Gender': ['Male', 'Female', 'Male', 'Female'],
    'Age': [25, 30, 22, 29],
    'Purchased': ['Yes', 'No', 'Yes', 'No']
}

df = pd.DataFrame(data)

# Step 1: Identifying categorical columns by selecting object and category types
categorical_columns = df.select_dtypes(['object'])
print("Categorical Columns:\n", categorical_columns)
# Output:
# Categorical Columns:
#      Name  Gender Purchased
# 0    John    Male      Yes
# 1   Alice  Female       No
# 2     Bob    Male      Yes
# 3    Emma  Female       No

# Step 2: Converting specific columns to 'category' type
df['Gender'] = df['Gender'].astype('category')
df['Purchased'] = df['Purchased'].astype('category')

# Step 3: Checking the data types after conversion
print("\nData Types After Conversion:\n", df.dtypes)
# Output:
# Data Types After Conversion:
# Name           object
# Gender       category
# Age             int64
# Purchased    category
# dtype: object

# Step 4: Checking memory usage before and after conversion
print("\nMemory Usage Before Conversion:")
print(df.memory_usage(deep=True))
# Output:
# Memory Usage Before Conversion:
# Index         128
# Name          204
# Gender        136
# Age            32
# Purchased     136
# dtype: int64

# Convert 'Name' column to category for further demonstration
df['Name'] = df['Name'].astype('category')

print("\nMemory Usage After Conversion:")
print(df.memory_usage(deep=True))
# Output:
# Memory Usage After Conversion:
# Index         128
# Name           36
# Gender         36
# Age            32
# Purchased      36
# dtype: int64
