# Master Python for LLMs - Part 2

## Lists and Dictionaries for Text Processing

### 1. Operations with Token Lists

#### Basic List Manipulation

In [1]:
# Basic creation and manipulation
tokens = ['[START]', 'Hello', 'world', '[END]']

#### Slicing to Remove Special Tokens

In [2]:
clean_tokens = tokens[1:-1]
print("Tokens without markers:", clean_tokens)
# Result: Tokens without markers: ['Hello', 'world']

Tokens without markers: ['Hello', 'world']


#### List Comprehensions for Processing

In [3]:
uppercase_tokens = [t.upper() for t in clean_tokens]
print("Tokens in uppercase:", uppercase_tokens)
# Result: Tokens in uppercase: ['HELLO', 'WORLD']

Tokens in uppercase: ['HELLO', 'WORLD']


#### Filtering with Conditions

In [4]:
# Filtering with conditions
long_tokens = [t for t in tokens if len(t) > 3]
print("Tokens with more than 3 characters:", long_tokens)
# Result: Tokens with more than 3 characters: ['[START]', 'world', '[END]']

Tokens with more than 3 characters: ['[START]', 'Hello', 'world', '[END]']


### 2. Dictionaries for Configuration and Metadata

In [5]:
# Basic LLM configuration
config = {
    'model': 'gpt-3.5-turbo',
    'temperature': 0.7,
    'max_tokens': 100
}

#### Nested Dictionaries

In [6]:
# Nested dictionaries
complete_config = {
    'model': {
        'name': 'gpt-4',
        'version': '2024',
        'parameters': {
            'temperature': 0.7,
            'presence_penalty': 0.1
        }
    },
    'api': {
        'timeout': 30,
        'retries': 3
    }
}

### 3. Sets for Efficient Filtering

In [7]:
# Creating a set of stop words
stop_words = {'of', 'the', 'in', 'and', 'a', 'to'}

# Filtering tokens with sets
text_tokens = ['the', 'model', 'processes', 'the', 'text', 'and', 'generates', 'responses']
filtered_tokens = [t for t in text_tokens if t not in stop_words]

### 4. Tuples for Immutable Data

In [8]:
from collections import namedtuple

# Creating a named tuple for configuration
Config = namedtuple('Config', ['model', 'temperature', 'max_tokens'])

# Instantiating the configuration
default_config = Config('gpt-4', 0.7, 100)