In [1]:
import pandas as pd
import re

In [2]:
# Main execution
ref_df = pd.read_excel(r'..\acts_and_miracles\acts_and_miracles_preprep.xlsx', sheet_name='ref_to_text')

In [3]:
scripture_df = pd.read_csv(r'..\scripture\asv.csv')

In [4]:
scripture_df

Unnamed: 0,Verse ID,Book Name,Book Number,Chapter,Verse,Text
0,1,Genesis,1,1,1,In the beginning God created the heavens and t...
1,2,Genesis,1,1,2,And the earth was waste and void; and darkness...
2,3,Genesis,1,1,3,"And God said, Let there be light: and there wa..."
3,4,Genesis,1,1,4,"And God saw the light, that it was good: and G..."
4,5,Genesis,1,1,5,"And God called the light Day, and the darkness..."
...,...,...,...,...,...,...
31097,31098,Revelation,66,22,17,"And the Spirit and the bride say, Come. And he..."
31098,31099,Revelation,66,22,18,I testify unto every man that heareth the word...
31099,31100,Revelation,66,22,19,and if any man shall take away from the words ...
31100,31101,Revelation,66,22,20,"He who testifieth these things saith, Yea: I c..."


In [None]:
import pandas as pd
import re

def expand_reference(ref_str):
    """Parse and expand a scripture reference into individual verses"""
    # Extract book name (everything before first number)
    book_match = re.match(r'^([^0-9]+)', ref_str.strip())
    book = book_match.group(1).strip() if book_match else ""
    
    # Remove book name to get just the verse references
    verses_str = ref_str[len(book):].strip()
    
    result = []
    
    # Split by comma to handle multiple references
    for part in verses_str.split(','):
        part = part.strip()
        
        # Match patterns: "1:2-3:4" or "1:2-3" or "1:2"
        if ':' in part:
            if '-' in part:
                # Range format
                start, end = part.split('-')
                start_parts = start.split(':')
                end_parts = end.split(':')
                
                if len(end_parts) == 2:  # Full end reference (chapter:verse)
                    start_ch, start_v = int(start_parts[0]), int(start_parts[1])
                    end_ch, end_v = int(end_parts[0]), int(end_parts[1])
                else:  # Just end verse (same chapter)
                    start_ch, start_v = int(start_parts[0]), int(start_parts[1])
                    end_ch, end_v = start_ch, int(end_parts[0])
                
                # Generate all verses in range
                for ch in range(start_ch, end_ch + 1):
                    v_start = start_v if ch == start_ch else 1
                    v_end = end_v if ch == end_ch else 200  # Assume max 200 verses per chapter
                    for v in range(v_start, v_end + 1):
                        result.append((book, ch, v))
            else:
                # Single verse
                ch, v = part.split(':')
                result.append((book, int(ch), int(v)))
    
    return result

def join_references_with_text(ref_df, scripture_df):
    """Join expanded references with scripture text"""
    # Create lookup dictionary
    scripture_dict = {
        (row['Book Name'], row['Chapter'], row['Verse']): row['Text']
        for _, row in scripture_df.iterrows()
    }
    
    # Build result rows
    rows = []
    for _, ref_row in ref_df.iterrows():
        event_id = ref_row['EventID']
        reference = ref_row['Reference']
        
        for book, chapter, verse in expand_reference(reference):
            text = scripture_dict.get((book, chapter, verse), "")
            if text:  # Only include if text exists
                rows.append({
                    'EventID': event_id,
                    'Reference': reference,
                    'Reference_Code': f"{book} {chapter}:{verse}",
                    'Reference_Text': text
                })
    
    return pd.DataFrame(rows)

# Main execution
def process_files(ref_file='ref_to_text.csv', scripture_file='SCRIPTURE.csv', output_file='expanded_references.csv'):
    """Process CSV files and save result"""
    ref_df = pd.read_csv(ref_file)
    scripture_df = pd.read_csv(scripture_file)
    
    result_df = join_references_with_text(ref_df, scripture_df)
    result_df.to_csv(output_file, index=False)
    
    print(f"Created {len(result_df)} rows")
    print(f"Sample output:\n{result_df.head(10)}")
    return result_df

# Test with examples
def test_examples():
    """Test with the provided example references"""
    test_refs = [
        "Genesis 37:5-11",
        "Genesis 38:7",
        "Genesis 40:5-11, 40:16-17",
        "Genesis 40:8, 40:12-13, 40:18-19",
        "Genesis 41:1-7",
        "Exodus 3:4-4:17",
        "Exodus 4:21, 7:3, 7:13, 8:15, 8:19, 8:32, 9:7, 9:12, 9:34-35, 10:1, 10:20, 10:27, 11:10, 14:4, 14:8"
    ]
    
    print("Testing reference expansion:")
    for ref in test_refs:
        expanded = expand_reference(ref)
        print(f"\n{ref}")
        print(f"  → {len(expanded)} verses: {expanded[:3]}..." if len(expanded) > 3 else f"  → {expanded}")

# Run one of these:
# process_files()  # For actual CSV files
# test_examples()  # To test the parsing logic

In [None]:
result_df = expand_references(ref_df, scripture_df)

In [None]:
result_df

In [None]:
result_df = expand_references(ref_df, scripture_df)
result_df.to_csv('expanded_references.csv', index=False)

print(f"Created {len(result_df)} rows")
print(result_df.head(10))