### Python Data Structure  - Working with Data in Python

What is a Data Structure in Python?

A data structure is a way of organizing, storing, and managing data so it can be used efficiently.

In Python, data structures help you:

* Group related data

* Access and manipulate data easily

* Write clear and optimized code


**Python String Manipulation Complete Guide**

#### What is a String?

A string in Python is an immutable sequence of characters. Strings are one of the most commonly used data types and are defined using single quotes `'`, double quotes `"`, or triple quotes `'''` or `"""` for multi-line strings.



In [1]:

### Basic String Creation
# Single quotes
single_quote = 'Hello World'

# Double quotes
double_quote = "Hello World"

# Triple quotes for multi-line strings
multi_line = """This is a
multi-line
string"""

# Triple single quotes
multi_line2 = '''Another way to
create multi-line
strings'''

# Empty string
empty_string = ""
empty_string2 = str()


In [2]:

## Creating Strings

### String Literals and Escape Characters

# Escape characters
escaped = "He said, \"Hello World!\""
path = "C:\\Users\\Documents\\file.txt"
newline = "Line 1\nLine 2\nLine 3"
tab = "Column1\tColumn2\tColumn3"

# Raw strings (ignores escape characters)
raw_path = r"C:\Users\Documents\file.txt"
regex_pattern = r"\d+\.\d+"

# Unicode strings
unicode_string = "Hello 🌍 世界"
unicode_escape = "\u0048\u0065\u006C\u006C\u006F"  # "Hello"



In [3]:

## String Indexing and Slicing

### Indexing

text = "Python"

# Positive indexing (0-based)
first_char = text[0]    # 'P'
second_char = text[1]   # 'y'
last_char = text[-1]    # 'n'
second_last = text[-2]  # 'o'

# String length
length = len(text)      # 6


In [4]:

### Slicing

text = "Hello World"

# Basic slicing [start:end:step]
substring = text[0:5]       # "Hello"
substring = text[:5]        # "Hello" (start defaults to 0)
substring = text[6:]        # "World" (end defaults to length)
substring = text[:]         # "Hello World" (full copy)

# Negative indexing
substring = text[-5:]       # "World"
substring = text[:-6]       # "Hello"

# Step parameter
every_second = text[::2]    # "HloWrd"
reversed_text = text[::-1]  # "dlroW olleH"

# Advanced slicing
substring = text[1:8:2]     # "el o"


In [5]:

## String Methods

### Case Manipulation

text = "Hello World Python"

# Case conversion
upper_text = text.upper()           # "HELLO WORLD PYTHON"
lower_text = text.lower()           # "hello world python"
title_text = text.title()           # "Hello World Python"
capitalize_text = text.capitalize() # "Hello world python"
swapcase_text = text.swapcase()     # "hELLO wORLD pYTHON"

# Case checking
print(text.isupper())      # False
print(text.islower())      # False
print(text.istitle())      # True
print("HELLO".isupper())   # True
print("hello".islower())   # True


False
False
True
True
True


In [6]:
### String Searching and Checking
text = "Hello World Python Programming"

# Finding substrings
index = text.find("World")          # 6 (first occurrence)
index = text.find("xyz")            # -1 (not found)
rindex = text.rfind("o")            # 20 (last occurrence)

# Index method (raises ValueError if not found)
try:
    index = text.index("Python")    # 12
except ValueError:
    print("Substring not found")

# Checking string properties
print(text.startswith("Hello"))     # True
print(text.endswith("Programming")) # True
print("123".isdigit())              # True
print("abc".isalpha())              # True
print("abc123".isalnum())           # True
print("   ".isspace())              # True
print("Hello World".isascii())      # True


True
True
True
True
True
True
True


In [7]:


### String Cleaning and Formatting

text = "  Hello World Python  "

# Removing whitespace
stripped = text.strip()             # "Hello World Python"
left_stripped = text.lstrip()       # "Hello World Python  "
right_stripped = text.rstrip()      # "  Hello World Python"

# Removing specific characters
text2 = "...Hello World..."
cleaned = text2.strip(".")          # "Hello World"

# Padding strings
padded = "Hello".center(10)         # "  Hello   "
left_padded = "Hello".rjust(10)     # "     Hello"
right_padded = "Hello".ljust(10)    # "Hello     "
zero_padded = "42".zfill(5)         # "00042"


In [8]:


### String Replacement

text = "Hello World Python World"

# Basic replacement
replaced = text.replace("World", "Universe")        # "Hello Universe Python Universe"
replaced = text.replace("World", "Universe", 1)     # "Hello Universe Python World" (max 1 replacement)

# Translation tables
translation_table = str.maketrans("aeiou", "12345")
translated = text.translate(translation_table)      # "H2ll4 W4rld Pyth4n W4rld"

# Removing characters
remove_table = str.maketrans("", "", "aeiou")
no_vowels = text.translate(remove_table)            # "Hll Wrld Pythn Wrld"


In [None]:
### String Splitting and Joining

text = "apple,banana,cherry,date"

# Splitting strings
fruits = text.split(",")                    # ['apple', 'banana', 'cherry', 'date']
words = "Hello World Python".split()       # ['Hello', 'World', 'Python'] (splits on whitespace)
limited_split = text.split(",", 2)         # ['apple', 'banana', 'cherry,date']

# Right split
right_split = text.rsplit(",", 1)          # ['apple,banana,cherry', 'date']

# Split lines
multiline = "Line 1\nLine 2\nLine 3"
lines = multiline.splitlines()             # ['Line 1', 'Line 2', 'Line 3']

# Partition (splits into exactly 3 parts)
before, sep, after = "name@email.com".partition("@")  # ('name', '@', 'email.com')

# Joining strings
fruits_list = ['apple', 'banana', 'cherry']
joined = ",".join(fruits_list)             # "apple,banana,cherry"
spaced = " ".join(fruits_list)             # "apple banana cherry"



In [None]:

## String Formatting

### Old-style Formatting (% operator)

name = "Alice"
age = 25
score = 95.5

# Basic formatting
formatted = "Name: %s, Age: %d" % (name, age)               # "Name: Alice, Age: 25"
formatted = "Score: %.2f%%" % score                         # "Score: 95.50%"

# Named placeholders
formatted = "Name: %(name)s, Age: %(age)d" % {"name": name, "age": age}


In [None]:


### str.format() Method

name = "Alice"
age = 25
score = 95.567

# Positional arguments
formatted = "Name: {}, Age: {}".format(name, age)           # "Name: Alice, Age: 25"
formatted = "Name: {0}, Age: {1}, Name again: {0}".format(name, age)

# Named arguments
formatted = "Name: {name}, Age: {age}".format(name=name, age=age)

# Formatting options
formatted = "Score: {:.2f}".format(score)                  # "Score: 95.57"
formatted = "Number: {:,}".format(1234567)                 # "Number: 1,234,567"
formatted = "Percentage: {:.1%}".format(0.855)             # "Percentage: 85.5%"


In [None]:


### f-strings (Formatted String Literals) - Python 3.6+

name = "Alice"
age = 25
score = 95.567

# Basic f-string
formatted = f"Name: {name}, Age: {age}"                     # "Name: Alice, Age: 25"

# Expressions in f-strings
formatted = f"Next year, {name} will be {age + 1}"         # "Next year, Alice will be 26"

# Formatting options
formatted = f"Score: {score:.2f}"                          # "Score: 95.57"
formatted = f"Score: {score:.2%}"                          # "Score: 9556.70%" (if score as decimal)

# Multi-line f-strings
message = f"""
Name: {name}
Age: {age}
Score: {score:.2f}
"""

# F-string with dictionaries
person = {"name": "Bob", "age": 30}
formatted = f"Person: {person['name']}, Age: {person['age']}"

# F-string with functions
import datetime
now = datetime.datetime.now()
formatted = f"Current time: {now.strftime('%Y-%m-%d %H:%M:%S')}"


In [None]:
## Advanced String Operations

### String Multiplication and Concatenation

# String concatenation
greeting = "Hello" + " " + "World"          # "Hello World"
greeting += "!"                             # "Hello World!"

# String multiplication
separator = "-" * 20                        # "--------------------"
pattern = "abc" * 3                         # "abcabcabc"

# Joining vs concatenation (more efficient for multiple strings)
# Less efficient
result = ""
for word in ["Hello", "World", "Python"]:
    result += word + " "

# More efficient
words = ["Hello", "World", "Python"]
result = " ".join(words)


In [None]:

### String Encoding and Decoding

text = "Hello 世界"

# Encoding to bytes
utf8_bytes = text.encode('utf-8')
ascii_bytes = text.encode('ascii', errors='ignore')  # Ignores non-ASCII characters

# Decoding from bytes
decoded = utf8_bytes.decode('utf-8')

# Handling encoding errors
try:
    problematic = "Hello 世界".encode('ascii')
except UnicodeEncodeError as e:
    print(f"Encoding error: {e}")

# Safe encoding with error handling
safe_encoded = text.encode('ascii', errors='replace')  # Replaces with '?'
safe_encoded = text.encode('ascii', errors='ignore')   # Ignores problematic characters


In [9]:


### Regular Expressions with Strings

import re

text = "Contact us at support@email.com or sales@company.org"

# Finding patterns
emails = re.findall(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
# ['support@email.com', 'sales@company.org']

# Replacing with regex
phone_text = "Call us at 123-456-7890 or 987-654-3210"
formatted_phones = re.sub(r'(\d{3})-(\d{3})-(\d{4})', r'(\1) \2-\3', phone_text)
# "Call us at (123) 456-7890 or (987) 654-3210"

# Splitting with regex
mixed_separators = "apple;banana,cherry:date"
fruits = re.split(r'[;,:]+', mixed_separators)         # ['apple', 'banana', 'cherry', 'date']


In [None]:

## String Validation and Parsing

### Input Validation

def validate_email(email):
    """Simple email validation"""
    return "@" in email and "." in email.split("@")[-1]

def validate_phone(phone):
    """Validate phone number format"""
    cleaned = re.sub(r'\D', '', phone)  # Remove non-digits
    return len(cleaned) == 10

def parse_name(full_name):
    """Parse full name into components"""
    parts = full_name.strip().split()
    if len(parts) >= 2:
        return {
            "first": parts[0],
            "last": parts[-1],
            "middle": " ".join(parts[1:-1]) if len(parts) > 2 else ""
        }
    return {"first": full_name, "last": "", "middle": ""}

# Examples
print(validate_email("user@example.com"))   # True
print(validate_phone("123-456-7890"))       # True
print(parse_name("John Michael Smith"))     # {'first': 'John', 'last': 'Smith', 'middle': 'Michael'}


In [None]:


### String Parsing

# Parsing CSV-like data
csv_line = "John,25,Engineer,New York"
fields = csv_line.split(",")
person = {
    "name": fields[0],
    "age": int(fields[1]),
    "job": fields[2],
    "city": fields[3]
}

# Parsing key-value pairs
config_string = "debug=true;port=8080;host=localhost"
config = {}
for pair in config_string.split(";"):
    key, value = pair.split("=")
    config[key] = value

# Parsing URLs
url = "https://example.com:8080/path/to/resource?param1=value1&param2=value2"
from urllib.parse import urlparse, parse_qs

parsed_url = urlparse(url)
query_params = parse_qs(parsed_url.query)
print(f"Host: {parsed_url.hostname}")       # Host: example.com
print(f"Port: {parsed_url.port}")           # Port: 8080
print(f"Path: {parsed_url.path}")           # Path: /path/to/resource



In [None]:

## String Algorithms and Patterns

### String Comparison

# Case-sensitive comparison
print("Hello" == "hello")                   # False
print("Hello" == "Hello")                   # True

# Case-insensitive comparison
print("Hello".lower() == "hello".lower())   # True

# Lexicographic comparison
print("apple" < "banana")                   # True (alphabetical order)
print("10" < "2")                          # True (string comparison, not numeric)

# Using locale for proper sorting
import locale
# locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
words = ["apple", "Banana", "cherry"]
sorted_words = sorted(words, key=str.lower) # ['apple', 'Banana', 'cherry']


In [None]:


### String Algorithms

def is_palindrome(s):
    """Check if string is a palindrome"""
    cleaned = ''.join(char.lower() for char in s if char.isalnum())
    return cleaned == cleaned[::-1]

def count_words(text):
    """Count word frequency"""
    words = text.lower().split()
    word_count = {}
    for word in words:
        word = word.strip('.,!?;"')  # Remove punctuation
        word_count[word] = word_count.get(word, 0) + 1
    return word_count

def longest_common_substring(str1, str2):
    """Find longest common substring"""
    longest = ""
    for i in range(len(str1)):
        for j in range(i + 1, len(str1) + 1):
            substring = str1[i:j]
            if substring in str2 and len(substring) > len(longest):
                longest = substring
    return longest

# Examples
print(is_palindrome("A man a plan a canal Panama"))  # True
print(count_words("hello world hello python"))       # {'hello': 2, 'world': 1, 'python': 1}
print(longest_common_substring("GeeksforGeeks", "GeeksQuiz"))  # "Geeks"



In [None]:

## String Performance Considerations

### Efficient String Building

import time

# Inefficient: string concatenation in loop
def inefficient_join(words):
    result = ""
    for word in words:
        result += word + " "
    return result.strip()

# Efficient: using join
def efficient_join(words):
    return " ".join(words)

# Efficient: using list and join for complex operations
def build_html(items):
    html_parts = ["<ul>"]
    for item in items:
        html_parts.append(f"<li>{item}</li>")
    html_parts.append("</ul>")
    return "".join(html_parts)

# Using StringIO for complex string building
from io import StringIO

def complex_string_building(data):
    output = StringIO()
    output.write("<html><body>")
    for item in data:
        output.write(f"<p>{item}</p>")
    output.write("</body></html>")
    return output.getvalue()


In [None]:

## Common String Patterns and Use Cases

### Text Processing

def clean_text(text):
    """Clean and normalize text"""
    # Remove extra whitespace
    text = ' '.join(text.split())
    
    # Remove special characters (keep letters, numbers, spaces)
    import re
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    
    # Convert to lowercase
    text = text.lower()
    
    return text.strip()

def extract_hashtags(text):
    """Extract hashtags from social media text"""
    import re
    hashtags = re.findall(r'#\w+', text)
    return [tag.lower() for tag in hashtags]

def mask_sensitive_data(text):
    """Mask credit card numbers and emails"""
    # Mask credit card numbers
    text = re.sub(r'\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b', 
                  lambda m: m.group()[:4] + '*' * (len(m.group()) - 8) + m.group()[-4:], text)
    
    # Mask email addresses
    text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
                  lambda m: m.group().split('@')[0][:2] + '***@' + m.group().split('@')[1], text)
    
    return text

# Examples
messy_text = "  Hello   WORLD!!!   This is    Python...  "
print(clean_text(messy_text))  # "hello world this is python"

social_post = "Learning #Python and #DataScience today! #coding"
print(extract_hashtags(social_post))  # ['#python', '#datascience', '#coding']

sensitive = "My card is 1234-5678-9012-3456 and email is john.doe@example.com"
print(mask_sensitive_data(sensitive))  # "My card is 1234********3456 and email is jo***@example.com"


In [None]:


### Template Processing

class SimpleTemplate:
    """Simple template engine using string replacement"""
    
    def __init__(self, template):
        self.template = template
    
    def render(self, **kwargs):
        result = self.template
        for key, value in kwargs.items():
            placeholder = f"{{{key}}}"
            result = result.replace(placeholder, str(value))
        return result

# Advanced template with conditionals
def render_template_with_conditions(template, data):
    """Render template with simple if conditions"""
    import re
    
    # Handle if conditions: {{if condition}}content{{endif}}
    def replace_if(match):
        condition = match.group(1).strip()
        content = match.group(2)
        
        # Simple condition evaluation (just check if variable exists and is truthy)
        var_name = condition.replace('if ', '')
        if var_name in data and data[var_name]:
            return content
        return ""
    
    # Process if statements
    template = re.sub(r'\{\{if\s+(\w+)\}\}(.*?)\{\{endif\}\}', replace_if, template, flags=re.DOTALL)
    
    # Replace variables
    for key, value in data.items():
        template = template.replace(f"{{{key}}}", str(value))
    
    return template

# Examples
email_template = SimpleTemplate("""
Dear {name},

Thank you for your order #{order_id}.
Your total is ${total}.

Best regards,
{company}
""")

rendered = email_template.render(
    name="John Doe",
    order_id="12345",
    total="99.99",
    company="Example Corp"
)

# Advanced template example
advanced_template = """
Hello {name}!

{{if premium}}
Welcome to our premium service!
{{endif}}

Your account balance is ${balance}.
"""

result = render_template_with_conditions(advanced_template, {
    "name": "Alice",
    "premium": True,
    "balance": "150.00"
})


In [10]:


## String Security Considerations

### SQL Injection Prevention

# BAD: String concatenation (vulnerable to SQL injection)
def bad_query(user_id):
    query = f"SELECT * FROM users WHERE id = {user_id}"
    return query

# GOOD: Parameterized queries (safe)
def safe_query(user_id):
    query = "SELECT * FROM users WHERE id = ?"
    return query, (user_id,)

# Input sanitization
def sanitize_input(user_input):
    """Basic input sanitization"""
    # Remove potentially dangerous characters
    import re
    sanitized = re.sub(r'[<>"\';]', '', user_input)
    return sanitized.strip()



In [None]:

### Data Validation

def validate_username(username):
    """Validate username format"""
    if not username:
        return False, "Username cannot be empty"
    
    if len(username) < 3 or len(username) > 20:
        return False, "Username must be 3-20 characters"
    
    if not re.match(r'^[a-zA-Z0-9_]+$', username):
        return False, "Username can only contain letters, numbers, and underscores"
    
    return True, "Valid username"

def validate_password_strength(password):
    """Check password strength"""
    issues = []
    
    if len(password) < 8:
        issues.append("Password must be at least 8 characters")
    
    if not re.search(r'[A-Z]', password):
        issues.append("Password must contain uppercase letter")
    
    if not re.search(r'[a-z]', password):
        issues.append("Password must contain lowercase letter")
    
    if not re.search(r'\d', password):
        issues.append("Password must contain number")
    
    if not re.search(r'[!@#$%^&*(),.?":{}|<>]', password):
        issues.append("Password must contain special character")
    
    return len(issues) == 0, issues



In [None]:

## Best Practices

### 1. Use appropriate string methods

# Good: Use built-in methods
text = "  hello world  "
cleaned = text.strip().title()

# Avoid: Manual implementation
cleaned = ""
start = 0
end = len(text) - 1
while start < len(text) and text[start] == ' ':
    start += 1
while end >= 0 and text[end] == ' ':
    end -= 1
cleaned = text[start:end+1].title()



In [None]:

### 2. Use f-strings for formatting (Python 3.6+)

name = "Alice"
age = 25

# Good: f-strings (readable and efficient)
message = f"Hello {name}, you are {age} years old"

# Okay: str.format()
message = "Hello {}, you are {} years old".format(name, age)

# Avoid: % formatting (older style)
message = "Hello %s, you are %d years old" % (name, age)



In [None]:

### 3. Use join() for multiple concatenations

words = ["Hello", "World", "Python", "Programming"]

# Good: Use join()
sentence = " ".join(words)

# Inefficient: Multiple concatenations
sentence = ""
for word in words:
    sentence += word + " "
sentence = sentence.strip()



In [None]:

### 4. Handle encoding properly

# Good: Explicit encoding
with open('file.txt', 'r', encoding='utf-8') as f:
    content = f.read()

# Specify encoding when working with bytes
text_bytes = "Hello 世界".encode('utf-8')
text = text_bytes.decode('utf-8')



In [None]:

## Common Pitfalls

### 1. String immutability confusion

# Strings are immutable - this creates a new string
text = "Hello"
text += " World"  # Creates new string, doesn't modify original

# For multiple modifications, use list and join
parts = ["Hello"]
parts.append(" World")
parts.append("!")
result = "".join(parts)



In [None]:

### 2. Unicode and encoding issues

# Be aware of Unicode normalization
import unicodedata

text1 = "café"  # é as single character
text2 = "cafe\u0301"  # e + combining acute accent

print(text1 == text2)  # False - different representations
print(unicodedata.normalize('NFC', text1) == unicodedata.normalize('NFC', text2))  # True


In [None]:

### 3. Locale-dependent operations

# String comparison can be locale-dependent
import locale

# Set locale for proper string sorting
words = ["apple", "Banana", "cherry"]
sorted_words = sorted(words, key=str.lower)  # Case-insensitive sort
