In [13]:
import sys
sys.path.insert(0, '..')

from src.data.load_data import load_cleaned

# Clear any cached imports
if 'src.data.load_data' in sys.modules:
    del sys.modules['src.data.load_data']
    
# Load the cleaned dataset
df = load_cleaned()

print(f"Cleaned dataset loaded!")
print(f"Shape: {df.shape}")
print(f"\nColumn names: {df.columns}")
print(f"\nData types:")
print(df.schema)
print(f"\nFirst few rows:")
print(df.head())

Cleaned dataset loaded!
Shape: (30688, 13)

Column names: ['id', 'session', 'electoralTerm', 'firstName', 'lastName', 'politicianId', 'speechContent', 'factionId', 'documentUrl', 'positionShort', 'positionLong', 'date', 'speech_length']

Data types:
Schema([('id', Int64), ('session', Int64), ('electoralTerm', Int64), ('firstName', String), ('lastName', String), ('politicianId', Int64), ('speechContent', String), ('factionId', Int64), ('documentUrl', String), ('positionShort', String), ('positionLong', String), ('date', String), ('speech_length', Int64)])

First few rows:
shape: (5, 13)
┌─────────┬─────────┬────────────┬───────────┬───┬────────────┬────────────┬───────────┬───────────┐
│ id      ┆ session ┆ electoralT ┆ firstName ┆ … ┆ positionSh ┆ positionLo ┆ date      ┆ speech_le │
│ ---     ┆ ---     ┆ erm        ┆ ---       ┆   ┆ ort        ┆ ng         ┆ ---       ┆ ngth      │
│ i64     ┆ i64     ┆ ---        ┆ str       ┆   ┆ ---        ┆ ---        ┆ str       ┆ ---       │
│  

In [14]:
# Text Segmentation: Split speeches into paragraphs
import polars as pl
import re

# Test different splitting methods on the first speech
test_speech = df['speechContent'][0]

print("=" * 80)
print("COMPARING PARAGRAPH SPLITTING METHODS")
print("=" * 80)

# Method 1: Split by double newline (\n\n)
paragraphs_double_newline = test_speech.split('\n\n')
paragraphs_double_newline = [p.strip() for p in paragraphs_double_newline if p.strip()]

print(f"\nMethod 1: Split by \\n\\n")
print(f"  Number of paragraphs: {len(paragraphs_double_newline)}")
print(f"  Min length: {min(len(p) for p in paragraphs_double_newline) if paragraphs_double_newline else 0}")
print(f"  Max length: {max(len(p) for p in paragraphs_double_newline) if paragraphs_double_newline else 0}")
print(f"  Avg length: {sum(len(p) for p in paragraphs_double_newline) / len(paragraphs_double_newline) if paragraphs_double_newline else 0:.0f}")

# Method 2: Split by single newline (\n)
paragraphs_single_newline = test_speech.split('\n')
paragraphs_single_newline = [p.strip() for p in paragraphs_single_newline if p.strip()]

print(f"\nMethod 2: Split by \\n")
print(f"  Number of paragraphs: {len(paragraphs_single_newline)}")
print(f"  Min length: {min(len(p) for p in paragraphs_single_newline) if paragraphs_single_newline else 0}")
print(f"  Max length: {max(len(p) for p in paragraphs_single_newline) if paragraphs_single_newline else 0}")
print(f"  Avg length: {sum(len(p) for p in paragraphs_single_newline) / len(paragraphs_single_newline) if paragraphs_single_newline else 0:.0f}")

# Method 3: Split by regex (one or more newlines/spaces)
paragraphs_regex = re.split(r'\n\s*\n+', test_speech)
paragraphs_regex = [p.strip() for p in paragraphs_regex if p.strip()]

print(f"\nMethod 3: Split by regex (\\n\\s*\\n+)")
print(f"  Number of paragraphs: {len(paragraphs_regex)}")
print(f"  Min length: {min(len(p) for p in paragraphs_regex) if paragraphs_regex else 0}")
print(f"  Max length: {max(len(p) for p in paragraphs_regex) if paragraphs_regex else 0}")
print(f"  Avg length: {sum(len(p) for p in paragraphs_regex) / len(paragraphs_regex) if paragraphs_regex else 0:.0f}")

# Sample paragraphs
print(f"\n" + "=" * 80)
print("SAMPLE PARAGRAPHS (Method 1: \\n\\n)")
print("=" * 80)
for i, para in enumerate(paragraphs_double_newline[:3]):
    print(f"\nParagraph {i+1} ({len(para)} chars):")
    print(para[:200] + "..." if len(para) > 200 else para)

COMPARING PARAGRAPH SPLITTING METHODS

Method 1: Split by \n\n
  Number of paragraphs: 17
  Min length: 5
  Max length: 1021
  Avg length: 263

Method 2: Split by \n
  Number of paragraphs: 17
  Min length: 5
  Max length: 1021
  Avg length: 263

Method 3: Split by regex (\n\s*\n+)
  Number of paragraphs: 17
  Min length: 5
  Max length: 1021
  Avg length: 263

SAMPLE PARAGRAPHS (Method 1: \n\n)

Paragraph 1 (248 chars):
Sehr geehrter Herr Präsident Friedrich! Kolleginnen und Kollegen! Die letzten zwei Sätze des Kollegen Hunko kann ich nur begrüßen. Da möchte ich mich ausdrücklich anschließen. Das war mal was Gescheit...

Paragraph 2 (5 chars):
({0})

Paragraph 3 (1021 chars):
Aber lassen Sie mich, bevor wir die einzelnen Themen des bevorstehenden Europäischen Rats besprechen, kurz noch grundsätzlich etwas zur europapolitischen Ausrichtung sagen. Für uns als CSU ist Europa ...


In [15]:
# Apply Method 1 (split by \n\n) to all speeches
import polars as pl
from tqdm import tqdm

def split_speech_into_paragraphs(speech_content: str) -> list:
    """Split speech into paragraphs by double newline."""
    paragraphs = speech_content.split('\n\n')
    paragraphs = [p.strip() for p in paragraphs if p.strip()]
    return paragraphs

# Create a new dataframe with paragraphs as separate rows
rows_list = []

for row in tqdm(df.iter_rows(named=True), total=df.shape[0], desc="Splitting speeches into paragraphs"):
    speech_content = row['speechContent']
    paragraphs = split_speech_into_paragraphs(speech_content)
    
    # Create a row for each paragraph
    for para_num, paragraph in enumerate(paragraphs, 1):
        new_row = {
            **row,  # Copy all metadata from original speech
            'speechContent': paragraph,  # Replace with paragraph
            'paragraph_number': para_num,  # Add paragraph number
            'paragraph_length': len(paragraph)  # Add paragraph length
        }
        rows_list.append(new_row)

# Create new dataframe with paragraphs
df_paragraphs = pl.DataFrame(rows_list)

print("\nParagraph Segmentation Complete!")
print(f"\nOriginal dataset: {df.shape[0]} speeches")
print(f"New dataset: {df_paragraphs.shape[0]} paragraphs")
print(f"Average paragraphs per speech: {df_paragraphs.shape[0] / df.shape[0]:.1f}")

print(f"\nNew columns: {df_paragraphs.columns}")
print(f"\nParagraph length statistics:")
print(f"  Min: {df_paragraphs['paragraph_length'].min()}")
print(f"  Max: {df_paragraphs['paragraph_length'].max()}")
print(f"  Mean: {df_paragraphs['paragraph_length'].mean():.0f}")
print(f"  Median: {df_paragraphs['paragraph_length'].median():.0f}")

print(f"\nFirst few rows:")
print(df_paragraphs.head())

Splitting speeches into paragraphs: 100%|██████████| 30688/30688 [00:00<00:00, 31450.80it/s]



Paragraph Segmentation Complete!

Original dataset: 30688 speeches
New dataset: 588776 paragraphs
Average paragraphs per speech: 19.2

New columns: ['id', 'session', 'electoralTerm', 'firstName', 'lastName', 'politicianId', 'speechContent', 'factionId', 'documentUrl', 'positionShort', 'positionLong', 'date', 'speech_length', 'paragraph_number', 'paragraph_length']

Paragraph length statistics:
  Min: 1
  Max: 5527
  Mean: 221
  Median: 142

First few rows:
shape: (5, 15)
┌─────────┬─────────┬────────────┬───────────┬───┬────────────┬────────────┬───────────┬───────────┐
│ id      ┆ session ┆ electoralT ┆ firstName ┆ … ┆ date       ┆ speech_len ┆ paragraph ┆ paragraph │
│ ---     ┆ ---     ┆ erm        ┆ ---       ┆   ┆ ---        ┆ gth        ┆ _number   ┆ _length   │
│ i64     ┆ i64     ┆ ---        ┆ str       ┆   ┆ str        ┆ ---        ┆ ---       ┆ ---       │
│         ┆         ┆ i64        ┆           ┆   ┆            ┆ i64        ┆ i64       ┆ i64       │
╞═════════╪═══════

In [16]:
# Save segmented paragraphs to interim folder
from pathlib import Path

# Define output path
interim_dir = Path('../data/interim')
interim_dir.mkdir(exist_ok=True)

output_file = interim_dir / 'df_sample_split.csv'

# Save as CSV
df_paragraphs.write_csv(output_file)

print(f"Segmented paragraphs saved!")
print(f"Output file: {output_file}")
print(f"File size: {output_file.stat().st_size / 1024:.2f} KB")
print(f"Total rows (paragraphs): {df_paragraphs.shape[0]}")
print(f"Total columns: {df_paragraphs.shape[1]}")

Segmented paragraphs saved!
Output file: ..\data\interim\df_sample_split.csv
File size: 218219.05 KB
Total rows (paragraphs): 588776
Total columns: 15


In [17]:
# Convert all speeches to lowercase
import polars as pl

# Convert speechContent to lowercase
df_paragraphs_lowercase = df_paragraphs.with_columns(
    pl.col('speechContent').str.to_lowercase().alias('speechContent')
)

print("Lowercase conversion complete!")
print(f"\nAll {df_paragraphs_lowercase.shape[0]} paragraphs converted to lowercase")
print(f"\nSample (first paragraph, first 200 chars):")
print(df_paragraphs_lowercase['speechContent'][0][:200])

Lowercase conversion complete!

All 588776 paragraphs converted to lowercase

Sample (first paragraph, first 200 chars):
sehr geehrter herr präsident friedrich! kolleginnen und kollegen! die letzten zwei sätze des kollegen hunko kann ich nur begrüßen. da möchte ich mich ausdrücklich anschließen. das war mal was gescheit


In [18]:
# Tokenize all paragraphs using spaCy (OPTIMIZED with nlp.pipe)
import spacy
import polars as pl
from tqdm import tqdm

# Load German language model
try:
    nlp = spacy.load('de_core_news_sm')
    print("spaCy model 'de_core_news_sm' loaded successfully!")
except OSError:
    print("Model not found. Installing de_core_news_sm...")
    import subprocess
    subprocess.run(['python', '-m', 'spacy', 'download', 'de_core_news_sm'])
    nlp = spacy.load('de_core_news_sm')

# OPTIMIZATION: Disable unused pipeline components for faster tokenization
# We only need the tokenizer at this stage
nlp.disable_pipes(['tagger', 'parser', 'ner', 'attribute_ruler', 'lemmatizer'])
print(f"Active pipeline components: {nlp.pipe_names}")

# Get texts as list
texts = df_paragraphs['speechContent'].to_list()

# OPTIMIZATION: Use nlp.pipe() for batch processing (10-100x faster than individual nlp() calls)
# batch_size controls memory vs speed tradeoff
tokens_list = []
for doc in tqdm(nlp.pipe(texts, batch_size=100), total=len(texts), desc="Tokenizing paragraphs"):
    tokens = [token.text for token in doc]
    tokens_list.append(tokens)

# Re-enable all components for later use
nlp.enable_pipe('tagger')
nlp.enable_pipe('parser')
nlp.enable_pipe('ner')
nlp.enable_pipe('attribute_ruler')
nlp.enable_pipe('lemmatizer')

# Add tokens column to dataframe
df_paragraphs = df_paragraphs.with_columns(
    pl.Series('tokens', tokens_list)
)

# Add token count column
df_paragraphs = df_paragraphs.with_columns(
    pl.col('tokens').list.len().alias('token_count')
)

print("\nTokenization complete!")
print(f"\nDataframe shape: {df_paragraphs.shape}")
print(f"New columns: {df_paragraphs.columns}")

print(f"\nToken count statistics:")
print(f"  Min: {df_paragraphs['token_count'].min()}")
print(f"  Max: {df_paragraphs['token_count'].max()}")
print(f"  Mean: {df_paragraphs['token_count'].mean():.1f}")
print(f"  Median: {df_paragraphs['token_count'].median():.0f}")

print(f"\nSample (first paragraph tokens):")
print(f"  Tokens: {df_paragraphs['tokens'][0]}")
print(f"  Token count: {df_paragraphs['token_count'][0]}")

spaCy model 'de_core_news_sm' loaded successfully!
Active pipeline components: ['tok2vec', 'morphologizer']


Tokenizing paragraphs: 100%|██████████| 588776/588776 [20:36<00:00, 475.98it/s]



Tokenization complete!

Dataframe shape: (588776, 17)
New columns: ['id', 'session', 'electoralTerm', 'firstName', 'lastName', 'politicianId', 'speechContent', 'factionId', 'documentUrl', 'positionShort', 'positionLong', 'date', 'speech_length', 'paragraph_number', 'paragraph_length', 'tokens', 'token_count']

Token count statistics:
  Min: 1
  Max: 885
  Mean: 39.6
  Median: 25

Sample (first paragraph tokens):
  Tokens: shape: (44,)
Series: '' [str]
[
	"Sehr"
	"geehrter"
	"Herr"
	"Präsident"
	"Friedrich"
	…
	"Ich"
	"bin"
	"ganz"
	"begeistert"
	"."
]
  Token count: 44


In [19]:
# Remove stopwords from tokens (MEMORY OPTIMIZED)
import polars as pl
import gc

# Use stopwords from the already-loaded nlp model (don't reload!)
# nlp should already be in memory from the tokenization cell
german_stopwords = set(nlp.Defaults.stop_words)  # Use set for O(1) lookup

print("Stopword Removal (Memory Optimized)")
print(f"Number of German stopwords: {len(german_stopwords)}")
print(f"Sample stopwords: {list(german_stopwords)[:20]}")

# Process in batches to avoid memory issues
tokens_list = df_paragraphs['tokens'].to_list()
tokens_no_stopwords_list = []

batch_size = 10000
for i in range(0, len(tokens_list), batch_size):
    batch = tokens_list[i:i+batch_size]
    for tokens in batch:
        filtered = [token for token in tokens if token.lower() not in german_stopwords]
        tokens_no_stopwords_list.append(filtered)
    
    # Progress indicator
    if (i + batch_size) % 50000 == 0 or i + batch_size >= len(tokens_list):
        print(f"  Processed {min(i + batch_size, len(tokens_list))}/{len(tokens_list)} paragraphs...")

# Clear the original list to free memory
del tokens_list
gc.collect()

# Add new columns
df_paragraphs = df_paragraphs.with_columns([
    pl.Series('tokens_no_stopwords', tokens_no_stopwords_list),
])

# Add count column separately to avoid memory spike
df_paragraphs = df_paragraphs.with_columns(
    pl.col('tokens_no_stopwords').list.len().alias('token_count_no_stopwords')
)

# Clean up
del tokens_no_stopwords_list
gc.collect()

print("\nStopword removal complete!")
print(f"\nDataframe shape: {df_paragraphs.shape}")

print(f"\nToken count comparison:")
print(f"  Original tokens - Mean: {df_paragraphs['token_count'].mean():.1f}")
print(f"  After stopword removal - Mean: {df_paragraphs['token_count_no_stopwords'].mean():.1f}")
print(f"  Reduction: {(1 - df_paragraphs['token_count_no_stopwords'].mean() / df_paragraphs['token_count'].mean()) * 100:.1f}%")

print(f"\nSample comparison (first paragraph):")
print(f"  Original tokens ({df_paragraphs['token_count'][0]}): {df_paragraphs['tokens'][0]}")
print(f"  After stopword removal ({df_paragraphs['token_count_no_stopwords'][0]}): {df_paragraphs['tokens_no_stopwords'][0]}")

Stopword Removal (Memory Optimized)
Number of German stopwords: 543
Sample stopwords: ['wenige', 'am', 'weil', 'dürfen', 'statt', 'seines', 'dank', 'vom', 'eigenes', 'damals', 'wenigstens', 'zuerst', 'zwanzig', 'neben', 'manchen', 'würden', 'gehabt', 'jemanden', 'ab', 'drin']
  Processed 50000/588776 paragraphs...
  Processed 100000/588776 paragraphs...
  Processed 150000/588776 paragraphs...
  Processed 200000/588776 paragraphs...
  Processed 250000/588776 paragraphs...
  Processed 300000/588776 paragraphs...
  Processed 350000/588776 paragraphs...
  Processed 400000/588776 paragraphs...
  Processed 450000/588776 paragraphs...
  Processed 500000/588776 paragraphs...
  Processed 550000/588776 paragraphs...
  Processed 588776/588776 paragraphs...

Stopword removal complete!

Dataframe shape: (588776, 19)

Token count comparison:
  Original tokens - Mean: 39.6
  After stopword removal - Mean: 21.3
  Reduction: 46.2%

Sample comparison (first paragraph):
  Original tokens (44): shape: (44

In [20]:
# Remove punctuation and numbers from tokens (MEMORY OPTIMIZED)
import polars as pl
import string
import re
import gc

# Don't reload spaCy - it's already in memory!

print("Punctuation and Number Removal (Memory Optimized)")
print(f"Punctuation characters: {string.punctuation}")

# Pre-compile regex for speed
punct_num_pattern = re.compile(r'[\d\W]')
punct_digits_set = set(string.punctuation + string.digits)

# Process in batches
tokens_list = df_paragraphs['tokens_no_stopwords'].to_list()
tokens_clean_list = []

batch_size = 10000
for i in range(0, len(tokens_list), batch_size):
    batch = tokens_list[i:i+batch_size]
    for tokens in batch:
        cleaned = []
        for token in tokens:
            if all(c in punct_digits_set for c in token):
                continue
            cleaned_token = punct_num_pattern.sub('', token)
            if cleaned_token:
                cleaned.append(cleaned_token)
        tokens_clean_list.append(cleaned)
    
    if (i + batch_size) % 50000 == 0 or i + batch_size >= len(tokens_list):
        print(f"  Processed {min(i + batch_size, len(tokens_list))}/{len(tokens_list)} paragraphs...")

del tokens_list
gc.collect()

# Add columns
df_paragraphs = df_paragraphs.with_columns(
    pl.Series('tokens_clean', tokens_clean_list)
)
df_paragraphs = df_paragraphs.with_columns(
    pl.col('tokens_clean').list.len().alias('token_count_clean')
)

del tokens_clean_list
gc.collect()

print("\nPunctuation and number removal complete!")
print(f"Dataframe shape: {df_paragraphs.shape}")

print(f"\nToken count progression:")
print(f"  Original tokens - Mean: {df_paragraphs['token_count'].mean():.1f}")
print(f"  After stopword removal - Mean: {df_paragraphs['token_count_no_stopwords'].mean():.1f}")
print(f"  After punct/number removal - Mean: {df_paragraphs['token_count_clean'].mean():.1f}")

Punctuation and Number Removal (Memory Optimized)
Punctuation characters: !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
  Processed 50000/588776 paragraphs...
  Processed 100000/588776 paragraphs...
  Processed 150000/588776 paragraphs...
  Processed 200000/588776 paragraphs...
  Processed 250000/588776 paragraphs...
  Processed 300000/588776 paragraphs...
  Processed 350000/588776 paragraphs...
  Processed 400000/588776 paragraphs...
  Processed 450000/588776 paragraphs...
  Processed 500000/588776 paragraphs...
  Processed 550000/588776 paragraphs...
  Processed 588776/588776 paragraphs...

Punctuation and number removal complete!
Dataframe shape: (588776, 21)

Token count progression:
  Original tokens - Mean: 39.6
  After stopword removal - Mean: 21.3
  After punct/number removal - Mean: 12.5


In [21]:
# Lemmatization (MEMORY OPTIMIZED - smaller batches)
import polars as pl
from tqdm import tqdm
import gc

# Don't reload spaCy! Reuse nlp from tokenization cell

print("Lemmatization (Memory Optimized)")
print("Converting tokens to their base/lemma form...")

# Disable components we don't need
nlp.disable_pipes(['parser', 'ner'])
print(f"Active pipeline components: {nlp.pipe_names}")

# Process in smaller batches to avoid OOM
tokens_clean = df_paragraphs['tokens_clean'].to_list()
tokens_lemma_list = []

# Use smaller batch size for memory efficiency
batch_size = 5000
nlp_batch_size = 50  # Smaller nlp.pipe batch

for i in range(0, len(tokens_clean), batch_size):
    batch = tokens_clean[i:i+batch_size]
    texts_batch = [' '.join(tokens) if tokens else '' for tokens in batch]
    
    for doc in nlp.pipe(texts_batch, batch_size=nlp_batch_size):
        lemmas = [token.lemma_ for token in doc]
        tokens_lemma_list.append(lemmas)
    
    # Free memory after each batch
    del texts_batch
    gc.collect()
    
    print(f"  Processed {min(i + batch_size, len(tokens_clean))}/{len(tokens_clean)} paragraphs...")

del tokens_clean
gc.collect()

# Re-enable components
nlp.enable_pipe('parser')
nlp.enable_pipe('ner')

# Add to dataframe
df_paragraphs = df_paragraphs.with_columns(
    pl.Series('tokens_lemma', tokens_lemma_list)
)
df_paragraphs = df_paragraphs.with_columns(
    pl.col('tokens_lemma').list.len().alias('token_count_lemma')
)

del tokens_lemma_list
gc.collect()

print("\nLemmatization complete!")
print(f"Dataframe shape: {df_paragraphs.shape}")

print(f"\nToken count at each stage:")
print(f"  Original: {df_paragraphs['token_count'].mean():.1f}")
print(f"  After stopwords: {df_paragraphs['token_count_no_stopwords'].mean():.1f}")
print(f"  After punct/num: {df_paragraphs['token_count_clean'].mean():.1f}")
print(f"  After lemma: {df_paragraphs['token_count_lemma'].mean():.1f}")

Lemmatization (Memory Optimized)
Converting tokens to their base/lemma form...
Active pipeline components: ['tok2vec', 'tagger', 'morphologizer', 'lemmatizer', 'attribute_ruler']
  Processed 5000/588776 paragraphs...
  Processed 10000/588776 paragraphs...
  Processed 15000/588776 paragraphs...
  Processed 20000/588776 paragraphs...
  Processed 25000/588776 paragraphs...
  Processed 30000/588776 paragraphs...
  Processed 35000/588776 paragraphs...
  Processed 40000/588776 paragraphs...
  Processed 45000/588776 paragraphs...
  Processed 50000/588776 paragraphs...
  Processed 55000/588776 paragraphs...
  Processed 60000/588776 paragraphs...
  Processed 65000/588776 paragraphs...
  Processed 70000/588776 paragraphs...
  Processed 75000/588776 paragraphs...
  Processed 80000/588776 paragraphs...
  Processed 85000/588776 paragraphs...
  Processed 90000/588776 paragraphs...
  Processed 95000/588776 paragraphs...
  Processed 100000/588776 paragraphs...
  Processed 105000/588776 paragraphs...
 

In [22]:
# Save preprocessed data to processed folder
from pathlib import Path

# Define output path
processed_dir = Path('../data/processed')
processed_dir.mkdir(exist_ok=True)

# Save as Parquet (supports nested data like lists)
output_file = processed_dir / 'df_sample_split_preprocessed.parquet'
df_paragraphs.write_parquet(output_file)

print(f"Preprocessed data saved!")
print(f"Output file: {output_file}")
print(f"File size: {output_file.stat().st_size / 1024:.2f} KB")
print(f"Total rows (paragraphs): {df_paragraphs.shape[0]}")
print(f"Total columns: {df_paragraphs.shape[1]}")
print(f"\nDataframe columns:")
for col in df_paragraphs.columns:
    print(f"  - {col}")

Preprocessed data saved!
Output file: ..\data\processed\df_sample_split_preprocessed.parquet
File size: 204685.89 KB
Total rows (paragraphs): 588776
Total columns: 23

Dataframe columns:
  - id
  - session
  - electoralTerm
  - firstName
  - lastName
  - politicianId
  - speechContent
  - factionId
  - documentUrl
  - positionShort
  - positionLong
  - date
  - speech_length
  - paragraph_number
  - paragraph_length
  - tokens
  - token_count
  - tokens_no_stopwords
  - token_count_no_stopwords
  - tokens_clean
  - token_count_clean
  - tokens_lemma
  - token_count_lemma
