# Example Use Cases of String Functions in Data Analysis

## 1. Import necessary libraries

In [1]:
import pandas as pd
import re  

## 2. Create a sample dataset

In [2]:
data = {'text_column': [
    'Hello, World!',
    'Data analysis is fun.',
    '12345 Python 67890',
    '   Extra spaces   ',
    'email@example.com'
]}

In [5]:
data

{'text_column': ['Hello, World!',
  'Data analysis is fun.',
  '12345 Python 67890',
  '   Extra spaces   ',
  'email@example.com']}

## 3. Create a DataFrame from the sample data

In [6]:
df = pd.DataFrame(data)

In [7]:
df

Unnamed: 0,text_column
0,"Hello, World!"
1,Data analysis is fun.
2,12345 Python 67890
3,Extra spaces
4,email@example.com


## 4. Exploratory Data Analysis (EDA) on string values
### Let's explore and manipulate the 'text_column'


## 5. Extracting specific patterns using regular expressions
### Example: Extract all words from the text


In [8]:
df['words'] = df['text_column'].apply(lambda x: re.findall(r'\b\w+\b', x))

In [9]:
df

Unnamed: 0,text_column,words
0,"Hello, World!","[Hello, World]"
1,Data analysis is fun.,"[Data, analysis, is, fun]"
2,12345 Python 67890,"[12345, Python, 67890]"
3,Extra spaces,"[Extra, spaces]"
4,email@example.com,"[email, example, com]"


In [10]:
# Example: Extract all words from the text without regular expressions

# Sample text
text = "This is a sample text, with punctuation and spaces."

# Split the text into words using whitespace as the delimiter
words = text.split()

# Remove punctuation from each word
words = [word.strip('.,!?') for word in words]

# Filter out empty strings
words = [word for word in words if word]

print("Extracted words:", words)

Extracted words: ['This', 'is', 'a', 'sample', 'text', 'with', 'punctuation', 'and', 'spaces']
