# Data Structures

Basic data structures in Python:
- **List** - An ordered mutable sequence of items
- **Tuple** - An ordered *immutable* sequence of items
- **Set** - An *unordered* collection with no duplicate items (no index)
- **Dictionary** - A collection of *key-value* pairs where *keys are unique and immutable*. Can access a value for a key. Values can be modified.

## List

In [None]:
# Creating a list
my_list = [1, 2, 3, 4, 5]

# Accessing elements
print(my_list[0])  # Output: 1
print(my_list[-1]) # Output: 5

# Slicing a list
print(my_list[1:3]) # Output: [2, 3]
print(my_list[:3]) # Output: [1, 2, 3]
print(my_list[3:]) # Output: [4, 5]
print(my_list[:-1]) # Output: [2, 3]

# Adding elements
my_list.append(6)
print(my_list) # Output: [1, 2, 3, 4, 5, 6]

# Inserting elements
my_list.insert(2, 'a')
print(my_list) # Output: [1, 2, 'a', 3, 4, 5, 6]

# Removing elements
my_list.remove('a')
print(my_list) # Output: [1, 2, 3, 4, 5, 6]

# Popping elements
popped_element = my_list.pop()
print(popped_element) # Output: 6
print(my_list)        # Output: [1, 2, 3, 4, 5]

# Reversing a list
my_list.reverse()
print(my_list) # Output: [5, 4, 3, 2, 1]

# Sorting a list
my_list.sort()
print(my_list) # Output: [1, 2, 3, 4, 5]

# Sorting a list in reverse order
my_list.sort(reverse=True)
print(my_list) # Output: [5, 4, 3, 2, 1]

# Counting elements
print(my_list.count(3)) # Output: 1

# Finding index of element
print(my_list.index(3)) # Output: 2

# Extending a list
my_list.extend([6, 7, 8])
print(my_list) # Output: [5, 4, 3, 2, 1, 6, 7, 8]

# Copying a list
my_list_copy = my_list.copy()
print(my_list_copy) # Output: [5, 4, 3, 2, 1, 6, 7, 8]

# Removing all elements
my_list.clear()
print(my_list) # Output: [] 

# Creating a list with repeated elements
my_list_repeated = [0] * 5
print(my_list_repeated) # Output: [0, 0, 0, 0, 0]

# Creating a list with a range of numbers
my_list_range = list(range(5))
print(my_list_range) # Output: [0, 1, 2, 3, 4]

# Creating a list from a string
my_list_string = list('hello')
print(my_list_string) # Output: ['h', 'e', 'l', 'l', 'o']

# Creating a list from a tuple
my_list_tuple = list((1, 2, 3))
print(my_list_tuple) # Output: [1, 2, 3]

# Creating a list from a set
my_list_set = list({1, 2, 3})
print(my_list_set) # Output: [1, 2, 3]

# Creating a list from a dictionary
my_list_dict = list({'a': 1, 'b': 2, 'c': 3})
print(my_list_dict) # Output: ['a', 'b', 'c']   

## Tuple

In [None]:
# Creating a tuple
my_tuple = (1, 2, 3, 4, 5)

# Accessing elements
print(my_tuple[0])  # Output: 1
print(my_tuple[-1]) # Output: 5

# Slicing a tuple
print(my_tuple[1:3]) # Output: (2, 3)

# Concatenating tuples
new_tuple = my_tuple + (6, 7)
print(new_tuple) # Output: (1, 2, 3, 4, 5, 6, 7)

# Repeating tuples
repeated_tuple = my_tuple * 2
print(repeated_tuple) # Output: (1, 2, 3, 4, 5, 1, 2, 3, 4, 5)

# Creating a new tuple where each item is multiplied by 2
new_tuple = tuple(x * 2 for x in my_tuple)
print(new_tuple) # Output: (2, 4, 6, 8, 10)

# Checking for an element
print(3 in my_tuple) # Output: True

# Finding the length of a tuple
print(len(my_tuple)) # Output: 5

# Finding the index of an element
print(my_tuple.index(3)) # Output: 2

# Counting occurrences of an element
print(my_tuple.count(3)) # Output: 1

## Set

In [None]:
# Creating a set
my_set = {1, 2, 3, 4, 5}

# Adding elements
my_set.add(6)
print(my_set) # Output: {1, 2, 3, 4, 5, 6}

# Removing elements
my_set.remove(3)
print(my_set) # Output: {1, 2, 4, 5, 6}

# Discarding elements (no error if element not found)
my_set.discard(10) # No error
print(my_set) # Output: {1, 2, 4, 5, 6}

# Popping elements (removes and returns an arbitrary element)
popped_element = my_set.pop()
print(popped_element) # Output: 1 (or any other element)
print(my_set)         # Output: {2, 4, 5, 6}

# Clearing a set
my_set.clear()
print(my_set) # Output: set()

# Set operations
set_a = {1, 2, 3}
set_b = {3, 4, 5}

# Union
print(set_a | set_b) # Output: {1, 2, 3, 4, 5}

# Intersection
print(set_a & set_b) # Output: {3}

# Difference
print(set_a - set_b) # Output: {1, 2}

# Symmetric difference
print(set_a ^ set_b) # Output: {1, 2, 4, 5}

# Checking for subset
print(set_a <= set_b) # Output: False

# Checking for superset
print(set_a >= set_b) # Output: False

# Checking for disjoint sets
print(set_a.isdisjoint(set_b)) # Output: False

## Dictionary

In [None]:
# Creating a dictionary
my_dict = {'a': 1, 'b': 2, 'c': 3}

# Accessing elements
print(my_dict['a'])  # Output: 1

# Adding or updating elements
my_dict['d'] = 4
print(my_dict) # Output: {'a': 1, 'b': 2, 'c': 3, 'd': 4}

# Removing elements
del my_dict['b']
print(my_dict) # Output: {'a': 1, 'c': 3, 'd': 4}

# Using pop to remove and return an element
value = my_dict.pop('c')
print(value)   # Output: 3
print(my_dict) # Output: {'a': 1, 'd': 4}

# Using popitem to remove and return the last inserted element
key, value = my_dict.popitem()
print(key, value) # Output: 'd' 4
print(my_dict)    # Output: {'a': 1}

# Checking if a key exists
print('a' in my_dict) # Output: True
print('b' in my_dict) # Output: False

# Iterating over keys
for key in my_dict:
    print(key) # Output: 'a'

# Iterating over values
for value in my_dict.values():
    print(value) # Output: 1

# Iterating over key-value pairs
for key, value in my_dict.items():
    print(key, value) # Output: 'a' 1

# Getting a value with a default
print(my_dict.get('a', 0)) # Output: 1
print(my_dict.get('b', 0)) # Output: 0

# Merging dictionaries
other_dict = {'e': 5, 'f': 6}
my_dict.update(other_dict)
print(my_dict) # Output: {'a': 1, 'e': 5, 'f': 6}

# String Manipulation


**String** characters are indexed. Slicing can be used as well.

Strings are *immutable*.


In [None]:
# Creating a string
my_string = "Hello, World!"

# Accessing characters
print(my_string[0])  # Output: 'H'
print(my_string[-1]) # Output: '!'

# Slicing a string
print(my_string[0:5]) # Output: 'Hello'

# Concatenating strings
new_string = my_string + " How are you?"
print(new_string) # Output: 'Hello, World! How are you?'

# Repeating strings
repeated_string = my_string * 2
print(repeated_string) # Output: 'Hello, World!Hello, World!'

# Changing case
print(my_string.upper()) # Output: 'HELLO, WORLD!'
print(my_string.lower()) # Output: 'hello, world!'
print(my_string.capitalize()) # Output: 'Hello, world!'

# Splitting a string
split_string = my_string.split(", ")
print(split_string) # Output: ['Hello', 'World!']

# Joining a list of strings
joined_string = ", ".join(split_string)
print(joined_string) # Output: 'Hello, World!'

# Stripping whitespace
whitespace_string = "   Hello, World!   "
print(whitespace_string.strip()) # Output: 'Hello, World!'
print(whitespace_string.lstrip()) # Output: 'Hello, World!   '
print(whitespace_string.rstrip()) # Output: '   Hello, World!'

# Replacing substrings
replaced_string = my_string.replace("World", "Python")
print(replaced_string) # Output: 'Hello, Python!'

# Finding substrings
index = my_string.find("World")
print(index) # Output: 7

# Checking if a string starts or ends with a substring
print(my_string.startswith("Hello")) # Output: True
print(my_string.endswith("!"))       # Output: True

# Formatting strings
name = "Alice"
age = 30
formatted_string = f"My name is {name} and I am {age} years old."
print(formatted_string) # Output: 'My name is Alice and I am 30 years old.'

# Regular Expressions

Explanation of Regular Expression Components:
- `r"pattern"`: Raw string literal, used to define the regular expression pattern.
- `re.findall(pattern, text)`: Finds all non-overlapping matches of the pattern in the text.
- `re.sub(pattern, replacement, text)`: Replaces all occurrences of the pattern in the text with the replacement string.
- `re.split(pattern, text)`: Splits the text into a list of strings based on the pattern.

Metacharacters:
- `.`: Matches any character except newline.
- `^`: Matches the beginning of the string.
- `$`: Matches the end of the string.
- `*`: Matches zero or more occurrences of the preceding character or group.
- `+`: Matches one or more occurrences of the preceding character or group.
- `?`: Matches zero or one occurrence of the preceding character or group.
- `[]`: Defines a character set.
- `|`: Represents an OR condition.
- `()`: Defines a capturing group.
- `\`: Escapes a metacharacter or represents a special sequence.

Character Sets:
- `[abc]`: Matches any of the characters a, b, or c.
- `[a-z]`: Matches any lowercase letter.
- `[A-Z]`: Matches any uppercase letter.
- `[0-9]`: Matches any digit.
- `[^abc]`: Matches any character except a, b, or c.

Examples:
- `\d`: Matches any digit (equivalent to [0-9]).
- `\D`: Matches any non-digit character.
- `\s`: Matches any whitespace character.
- `\S`: Matches any non-whitespace character.
- `\w`: Matches any alphanumeric character (including underscore).
- `\W`: Matches any non-alphanumeric character.

In [None]:
import re

# Sample text
text = "The rain in Spain falls mainly in the plain."

# Finding all matches
matches = re.findall(r'\bin\b', text)
print(matches) # Output: ['in', 'in']

# Searching for a pattern
match = re.search(r'Spain', text)
if match:
    print(f"Found '{match.group()}' at position {match.start()}") # Output: Found 'Spain' at position 12

# Splitting a string by a pattern
split_text = re.split(r'\s', text)
print(split_text) # Output: ['The', 'rain', 'in', 'Spain', 'falls', 'mainly', 'in', 'the', 'plain.']

# Replacing a pattern
replaced_text = re.sub(r'rain', 'snow', text)
print(replaced_text) # Output: 'The snow in Spain falls mainly in the plain.'

# Compiling a regular expression for reuse
pattern = re.compile(r'\bin\b')
matches = pattern.findall(text)
print(matches) # Output: ['in', 'in']

# Using groups
text_with_numbers = "My phone number is 123-456-7890."
match = re.search(r'(\d{3})-(\d{3})-(\d{4})', text_with_numbers)
if match:
    print(f"Area code: {match.group(1)}") # Output: Area code: 123
    print(f"Main number: {match.group(2)}-{match.group(3)}") # Output: Main number: 456-7890

# Finding all matches with groups
text_with_dates = "Today's date is 2023-10-01. Tomorrow's date is 2023-10-02."
matches = re.findall(r'(\d{4})-(\d{2})-(\d{2})', text_with_dates)
for match in matches:
    print(f"Year: {match[0]}, Month: {match[1]}, Day: {match[2]}")
# Output:
# Year: 2023, Month: 10, Day: 01
# Year: 2023, Month: 10, Day: 02