In [1]:
import polars as pl

# Load the data
df_child = pl.read_excel("data/policy_tracker.xlsx", sheet_name="4. US State - Current Child")

# Get the 4 passed bills
passed_bills = df_child.filter(
    pl.col("Status (optional)").str.contains("(?i)passed|act |chaptered")
)

print("=== PASSED BILLS - FULL TEXT ===\n")

for i, row in enumerate(passed_bills.iter_rows(named=True)):
    print(f"\n{'='*80}")
    print(f"BILL #{i+1}")
    print(f"State: {row['US State']}")
    print(f"Name: {row['Name']}")
    print(f"Intro Date: {row['Intro Date']}")
    print(f"Status: {row['Status (optional)']}")
    print(f"\nDescription:")
    print(row['Description'])
    print(f"{'='*80}")

print("\n\n=== SAMPLE OF PROPOSED BILLS ===\n")

# Get 5 random proposed bills
sample_bills = df_child.sample(5)

for i, row in enumerate(sample_bills.iter_rows(named=True)):
    print(f"\n{'='*80}")
    print(f"SAMPLE #{i+1}")
    print(f"State: {row['US State']}")
    print(f"Name: {row['Name']}")
    print(f"\nDescription:")
    print(row['Description'])
    print(f"{'='*80}")

Could not determine dtype for column 14, falling back to string


=== PASSED BILLS - FULL TEXT ===


BILL #1
State: Arkansas
Name: To create the protection of minors from distribution of harmful material act;  to establish liability for the publication or distribution of material harmful to minors on the internet;  and to require reasonable age verification.
Intro Date: 2023-01-17
Status: 4/11/2023 - Passed, now Act 612 - 100% progression

Description:
Requires age verification to access websites that are more than one-third pornography. Retaining ID information prohibited.

BILL #2
State: Arkansas
Name: To create the social media safety act;  to require age verification for use of social media;  and to clarify liability for failure to perform age verification for use of social media and illegal retention of data.
Intro Date: 2023-03-09
Status: 4/11/2023 - Passed, now Act 689 - 100% progression

Description:
Requires age verification / parental consent to open social media accounts, with significant exclusions. Retaining ID information prohibited.

B

In [None]:
import polars as pl
from langchain_ollama import OllamaLLM
import json

llm = OllamaLLM(model="qwen2.5:3b", temperature=0)

# Load bills
df_child = pl.read_excel("data/policy_tracker.xlsx", sheet_name="4. US State - Current Child")

features_list = []

for i, bill in enumerate(df_child.to_dicts()):
    print(f"Processing {i+1}/146: {bill['US State']} - {bill['Name'][:40]}...")
    
    prompt = f"""Does this bill include these features? Answer with valid JSON only.

Bill: {bill['Description']}

{{
  "age_verification": true/false,           // Requires proving user age
  "parental_consent": true/false,           // Requires parent permission/notification
  "data_collection_limits": true/false,     // Restricts what data can be collected from minors
  "algorithmic_restrictions": true/false,   // Regulates recommendation algorithms, autoplay, etc.
  "duty_of_care": true/false,              // Establishes duty of care / safety by design standard
  "risk_assessment_required": true/false,   // Requires platforms to assess/report risks
  "default_privacy_settings": true/false,   // Mandates privacy-protective defaults
  "time_limits": true/false,                // Screen time restrictions or break requirements
  "targets_all_platforms": true/false,      // Applies broadly vs specific platforms
  "school_based": true/false                // Focused on schools/educational contexts
}}"""
    
    try:
        response = llm.invoke(prompt)
        features = json.loads(response)
        features['state'] = bill['US State']
        features['name'] = bill['Name']
        features_list.append(features)
    except:
        print(f"  ✗ Failed to parse")

# Convert to DataFrame
features_df = pl.DataFrame(features_list)

print("\n=== FEATURE FREQUENCIES ===")
for col in ['age_verification', 'pornography_focus', 'social_media_focus', 
            'data_limits', 'parental_consent', 'platform_design', 'duty_of_care']:
    count = features_df[col].sum()
    print(f"{col}: {count}/146 bills ({count/146*100:.1f}%)")

Could not determine dtype for column 14, falling back to string


Processing 1/146: Alabama - Relating to consumer protection;  to req...
  ✗ Failed to parse
Processing 2/146: Arizona - To create the protection of minors from ...
  ✗ Failed to parse
Processing 3/146: Arkansas - To create the protection of minors from ...
  ✗ Failed to parse
Processing 4/146: Arkansas - To create the social media safety act;  ...
  ✗ Failed to parse
Processing 5/146: California - Business regulations: sexually explicit ...
  ✗ Failed to parse
Processing 6/146: California - The California Age-Appropriate Design Co...
  ✗ Failed to parse
Processing 7/146: California - Features that harm child users: civil pe...
  ✗ Failed to parse
Processing 8/146: California - Social media platforms: minor users: civ...
  ✗ Failed to parse
Processing 9/146: Connecticut - An act concerning minors and social medi...
  ✗ Failed to parse
Processing 10/146: Connecticut - An act concerning minors and digital pri...
  ✗ Failed to parse
Processing 11/146: Connecticut - An act establishing a co

ColumnNotFoundError: "age_verification" not found