In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/csqa-dataset/csqa_second_stage-5000.csv


In [4]:
df2 = pd.read_csv('/kaggle/input/csqa-dataset/csqa_second_stage-5000.csv')

In [5]:
df2.head()

Unnamed: 0,question,refined_correct_options,refined_incorrect_options
0,The sanctions against the school were a punish...,"['undermine', 'invalidate', 'negate']","['enforce rules', 'promote discipline', 'facil..."
1,Sammy wanted to go to where the people were. ...,"['community gatherings', 'local events', 'soci...","['sports arenas', 'quiet retreats', 'train pla..."
2,To locate a choker not located in a jewelry bo...,"['artisan market', 'vintage fair', 'online mar...","['fashion outlet', 'home decor store', 'person..."
3,Google Maps and other highway and street GPS s...,"['printed road maps', 'manual navigation techn...","['local traffic reports', 'historical navigati..."
4,"The fox walked from the city into the forest, ...","['natural habitat', 'suitable prey', 'shelter ...","['urban garden', 'city park', 'farm fields', '..."


In [6]:
print(df2.iloc[42]['question'])
print(df2.iloc[42]['refined_correct_options'])
print(df2.iloc[42]['refined_incorrect_options'])

What kind of driving leads to accidents?
['aggressive lane changes', 'excessive speed relative', 'distracted driving']
['stressful conditions', 'driving under influence', 'cautious driving', 'passenger distractions']


In [37]:
import pandas as pd
import ast
import itertools
import json
from typing import List, Dict, Tuple

import pandas as pd
import ast
import itertools
import json
from typing import List, Dict

def generate_logical_combinations(csv_file_path: str, output_file_path: str = None) -> Dict:
    """
    Generate AND, OR, NEITHER/NOR combinations from CSV data,
    avoiding duplicates and symmetric repeats.
    """
    
    def parse_facts_string(facts_str: str) -> List[str]:
        """Parse stringified list to Python list"""
        try:
            return ast.literal_eval(facts_str)
        except:
            return [item.strip().strip("'\"") for item in facts_str.split(",")]
    
    def generate_and_combinations(correct: List[str], incorrect: List[str]) -> Dict[str, List[str]]:
        valid_and = [f"{c1} AND {c2}" for c1, c2 in itertools.combinations(correct, 2)]
        invalid_and = [f"{c} AND {i}" for c in correct for i in incorrect] + \
                      [f"{i1} AND {i2}" for i1, i2 in itertools.combinations(incorrect, 2)]
        return {"correct": valid_and, "incorrect": invalid_and}
    
    def generate_or_combinations(correct: List[str], incorrect: List[str]) -> Dict[str, List[str]]:
        valid_or = [f"{c1} OR {c2}" for c1, c2 in itertools.combinations(correct, 2)]
        valid_or += [f"{c} OR {i}" for c in correct for i in incorrect]  # only one order
        invalid_or = [f"{i1} OR {i2}" for i1, i2 in itertools.combinations(incorrect, 2)]
        return {"correct": valid_or, "incorrect": invalid_or}
    
    def generate_neither_combinations(correct: List[str], incorrect: List[str]) -> Dict[str, List[str]]:
        valid_neither = [f"NEITHER {i1} NOR {i2}" for i1, i2 in itertools.combinations(incorrect, 2)]
        # Incorrect: pairs with at least one correct
        incorrect_neither = [f"NEITHER {c1} NOR {c2}" for c1, c2 in itertools.combinations(correct, 2)]
        incorrect_neither += [f"NEITHER {c} NOR {i}" for c in correct for i in incorrect]  # only one order
        return {"correct": valid_neither, "incorrect": incorrect_neither}
    
    # Read CSV
    df = pd.read_csv(csv_file_path)
    required_columns = ['question', 'refined_correct_options', 'refined_incorrect_options']
    for col in required_columns:
        if col not in df.columns:
            raise Exception(f"Missing column: {col}")
    
    results = []
    for idx, row in df.iterrows():
        question = row['question']
        correct_facts = parse_facts_string(str(row['refined_correct_options']))
        incorrect_facts = parse_facts_string(str(row['refined_incorrect_options']))
        
        and_comb = generate_and_combinations(correct_facts, incorrect_facts)
        or_comb = generate_or_combinations(correct_facts, incorrect_facts)
        neither_comb = generate_neither_combinations(correct_facts, incorrect_facts)
        
        question_result = {
            "question": question,
            "original_correct_facts": correct_facts,
            "original_incorrect_facts": incorrect_facts,
            "logical_combinations": {
                "AND_combinations": and_comb,
                "OR_combinations": or_comb,
                "NEITHER_combinations": neither_comb
            }
        }
        results.append(question_result)
    
    final_result = {
        "total_questions_processed": len(results),
        "questions": results
    }
    
    if output_file_path:
        with open(output_file_path, 'w', encoding='utf-8') as f:
            json.dump(final_result, f, indent=2, ensure_ascii=False)
        print(f"Results saved to {output_file_path}")
    
    return final_result

# Example usage and testing function
def test_function():
    """Test the function with sample data"""
    
    # # Create sample CSV data
    # sample_data = {
    #     'question': [
    #         "Where would you expect to find a pizzeria while shopping?",
    #         "What home entertainment equipment requires cable?"
    #     ],
    #     'refined_correct_options': [
    #         "{'['shopping plaza', 'near entertainment venues', 'within commercial districts']'}",
    #         "{'['television', 'cable box', 'home theater system']'}"
    #     ],
    #     'refined_incorrect_options': [
    #         "{'['outdoor festivals', 'local farmer\\'s market', 'near public transportation', 'inside a community center']'}",
    #         "{'['wireless audio', 'streaming console', 'portable projector', 'digital display']'}"
    #     ]
    # }
    
    # Save sample data to CSV
    # sample_df = pd.DataFrame(sample_data)
    # sample_df.to_csv('sample_data.csv', index=False)
    
    # Test the function
    try:
        results = generate_logical_combinations('/kaggle/input/csqa-dataset/csqa_second_stage-5000.csv', 'logical_combinations_output.json')
        
        # Print sample results
        print("Sample Results:")
        print(f"Processed {results['total_questions_processed']} questions")
        
        for i, question_data in enumerate(results['questions'][:1]):  # Show first question
            print(f"\nQuestion {i+1}: {question_data['question']}")
            print("\nAND Combinations:")
            print(f"  Correct: {question_data['logical_combinations']['AND_combinations']['correct']}")
            print(f"  Incorrect: {question_data['logical_combinations']['AND_combinations']['incorrect']}")

            print(f"\nQuestion {i+1}: {question_data['question']}")
            print("\nOR Combinations:")
            print(f"  Correct: {question_data['logical_combinations']['OR_combinations']['correct']}")
            print(f"  Incorrect: {question_data['logical_combinations']['OR_combinations']['incorrect']}")

            print(f"\nQuestion {i+1}: {question_data['question']}")
            print("\nNeither Combinations:")
            print(f"  Correct: {question_data['logical_combinations']['NEITHER_combinations']['correct']}")
            print(f"  Incorrect: {question_data['logical_combinations']['NEITHER_combinations']['incorrect']}")

            
            
    except Exception as e:
        print(f"Test failed: {e}")

if __name__ == "__main__":
    test_function()

Results saved to logical_combinations_output.json
Sample Results:
Processed 4999 questions

Question 1: The sanctions against the school were a punishing blow, and they seemed to what the efforts the school had made to change?

AND Combinations:
  Correct: ['undermine AND invalidate', 'undermine AND negate', 'invalidate AND negate']
  Incorrect: ['undermine AND enforce rules', 'undermine AND promote discipline', 'undermine AND facilitate growth', 'undermine AND accelerate change', 'invalidate AND enforce rules', 'invalidate AND promote discipline', 'invalidate AND facilitate growth', 'invalidate AND accelerate change', 'negate AND enforce rules', 'negate AND promote discipline', 'negate AND facilitate growth', 'negate AND accelerate change', 'enforce rules AND promote discipline', 'enforce rules AND facilitate growth', 'enforce rules AND accelerate change', 'promote discipline AND facilitate growth', 'promote discipline AND accelerate change', 'facilitate growth AND accelerate change']

In [10]:
import pandas as pd
import ast
import itertools
import json
import random
from typing import List, Dict, Tuple

def generate_meta_logical_combinations(csv_file_path: str, output_file_path: str = None) -> Dict:
    """
    Generate meta-logical combinations with three main types:
    1. Simple Combinations (AND, OR, NEITHER/NOR)
    2. Exact Quantifiers (Exactly X, Only X, All X, None)
    3. Exclusion Sets (All except X, Everything but X)
    
    Args:
        csv_file_path: Path to CSV with 'Question', 'Correct Facts', 'Incorrect Facts'
        output_file_path: Optional path to save results
    
    Returns:
        Dictionary containing all generated combinations with clear correct/incorrect bifurcation
    """
    
    def parse_facts_string(facts_str: str) -> List[str]:
        """Parse stringified list to Python list"""
        try:
            return ast.literal_eval(facts_str)
        except:
            return [item.strip().strip("'\"") for item in facts_str.split(",")]

    
    def create_option_mapping(correct: List[str], incorrect: List[str]) -> Tuple[List[str], List[int], Dict[int, str]]:
        """Create option list with roman numerals and track correct indices"""
        all_options = correct + incorrect
        random.shuffle(all_options)
        
        # Track which indices are correct in the shuffled list
        correct_indices = []
        for i, option in enumerate(all_options):
            if option in correct:
                correct_indices.append(i)
        
        # Create roman numeral mapping
        romans = ['i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii']
        roman_mapping = {i: romans[i] for i in range(len(all_options))}
        
        return all_options, correct_indices, roman_mapping
    
    def generate_simple_combinations(options: List[str], correct_indices: List[int], roman_mapping: Dict[int, str]) -> Dict:
        """Generate AND, OR, NEITHER combinations with proper bifurcation"""
        
        results = {
            "AND_combinations": {"correct": [], "incorrect": []},
            "OR_combinations": {"correct": [], "incorrect": []}, 
            "NEITHER_combinations": {"correct": [], "incorrect": []}
        }
        
        # Generate all possible pairs
        for i in range(len(options)):
            for j in range(i + 1, len(options)):
                roman_i = roman_mapping[i]
                roman_j = roman_mapping[j]
                
                # AND Combinations Logic
                if i in correct_indices and j in correct_indices:
                    # Both correct -> AND is correct
                    results["AND_combinations"]["correct"].append(f"{roman_i} and {roman_j}")
                else:
                    # At least one incorrect -> AND is incorrect
                    results["AND_combinations"]["incorrect"].append(f"{roman_i} and {roman_j}")
                
                # OR Combinations Logic  
                if i in correct_indices or j in correct_indices:
                    # At least one correct -> OR is correct
                    results["OR_combinations"]["correct"].append(f"Either {roman_i} or {roman_j}")
                else:
                    # Both incorrect -> OR is incorrect
                    results["OR_combinations"]["incorrect"].append(f"Either {roman_i} or {roman_j}")
                
                # NEITHER Combinations Logic
                if i not in correct_indices and j not in correct_indices:
                    # Both incorrect -> NEITHER is correct
                    results["NEITHER_combinations"]["correct"].append(f"Neither {roman_i} nor {roman_j}")
                else:
                    # At least one correct -> NEITHER is incorrect
                    results["NEITHER_combinations"]["incorrect"].append(f"Neither {roman_i} nor {roman_j}")
        
        return results
    
    def generate_exact_quantifiers(options: List[str], correct_indices: List[int]) -> Dict:
        """Generate exact quantifier combinations"""
        
        results = {"correct": [], "incorrect": []}
        
        num_correct = len(correct_indices)
        num_total = len(options)
        
        # "Exactly X of these"
        for x in range(0, num_total + 1):
            if x == num_correct:
                results["correct"].append(f"Exactly {x} of these")
            else:
                results["incorrect"].append(f"Exactly {x} of these")
        
        # "Only X of these" (same logic as exactly)
        for x in range(1, num_total + 1):
            if x == num_correct:
                results["correct"].append(f"Only {x} of these")
            else:
                results["incorrect"].append(f"Only {x} of these")
        
        # "All of these"
        if num_correct == num_total:
            results["correct"].append("All of these")
        else:
            results["incorrect"].append("All of these")
        
        # "None of these" 
        if num_correct == 0:
            results["correct"].append("None of these")
        else:
            results["incorrect"].append("None of these")
        
        # "More than X of these"
        for x in range(0, num_total):
            if num_correct > x:
                results["correct"].append(f"More than {x} of these")
            else:
                results["incorrect"].append(f"More than {x} of these")
        
        # "Fewer than X of these"
        for x in range(1, num_total + 2):
            if num_correct < x:
                results["correct"].append(f"Fewer than {x} of these")
            else:
                results["incorrect"].append(f"Fewer than {x} of these")
        
        return results
    
    def create_option_mapping(correct: List[str], incorrect: List[str]) -> Tuple[List[str], List[int], Dict[int, str]]:
        """
        Creates shuffled options, tracks correct indices, and builds a roman numeral mapping.
        """
        tagged = [(opt, True) for opt in correct] + [(opt, False) for opt in incorrect]
        random.shuffle(tagged)
    
        options = [opt for opt, _ in tagged]
        correct_indices = [i for i, (_, is_correct) in enumerate(tagged) if is_correct]
    
        # Roman mapping
        romans = ['i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii']
        roman_mapping = {i: romans[i] for i in range(len(options))}
    
        return options, correct_indices, roman_mapping

    def generate_exclusion_sets(options, correct_indices, roman_mapping):
        results = {"correct": [], "incorrect": []}
        
        n = len(options)
        indices = set(range(n))
        incorrect_indices = list(indices - set(correct_indices))
    
        # Sort for consistency
        excluded_romans = [roman_mapping[i] for i in sorted(incorrect_indices)]
        label = 'All except ' + ' and '.join(excluded_romans)
        results["correct"].append(label)
    
        # Generate all other exclusion subsets and mark them incorrect
        for r in range(1, n):  # exclude 1 to n-1 options
            for excluded in itertools.combinations(range(n), r):
                if set(excluded) != set(incorrect_indices):
                    excluded_romans = [roman_mapping[i] for i in excluded]
                    label = 'All except ' + ' and '.join(excluded_romans)
                    results["incorrect"].append(label)
    
        return results


    
    def format_output(question: str, options: List[str], roman_mapping: Dict[int, str], 
                     simple_combos: Dict, quantifier_combos: Dict, exclusion_combos: Dict) -> Dict:
        """Format the final output structure"""
        
        return {
            "question": question,
            "options": {roman_mapping[i]: options[i] for i in range(len(options))},
            "combinations": {
                "simple_combinations": {
                    "AND_combinations": simple_combos["AND_combinations"],
                    "OR_combinations": simple_combos["OR_combinations"], 
                    "NEITHER_combinations": simple_combos["NEITHER_combinations"]
                },
                "exact_quantifiers": quantifier_combos,
                "exclusion_sets": exclusion_combos
            },
            "statistics": {
                "total_options": len(options),
                "correct_options": len([i for i in range(len(options)) if i in [idx for idx in range(len(options)) if options[idx] in [opt for opt in options[:len([x for x in options if x in options[:3]])]]]]),
                "total_and_combinations": len(simple_combos["AND_combinations"]["correct"]) + len(simple_combos["AND_combinations"]["incorrect"]),
                "total_or_combinations": len(simple_combos["OR_combinations"]["correct"]) + len(simple_combos["OR_combinations"]["incorrect"]),
                "total_neither_combinations": len(simple_combos["NEITHER_combinations"]["correct"]) + len(simple_combos["NEITHER_combinations"]["incorrect"]),
                "total_quantifier_combinations": len(quantifier_combos["correct"]) + len(quantifier_combos["incorrect"]),
                "total_exclusion_combinations": len(exclusion_combos["correct"]) + len(exclusion_combos["incorrect"])
            }
        }
    
    # Read CSV
    try:
        df = pd.read_csv(csv_file_path)
    except Exception as e:
        raise Exception(f"Error reading CSV file: {e}")
    
    results = []
    
    for idx, row in df.iterrows():
        try:
            question = row['question']
            correct_facts = parse_facts_string(str(row['refined_correct_options']))
            incorrect_facts = parse_facts_string(str(row['refined_incorrect_options']))
            
            # Limit total options to 7 for manageability
            total_options = correct_facts + incorrect_facts
            if len(total_options) > 7:
                keep_incorrect = 7 - len(correct_facts)
                incorrect_facts = random.sample(incorrect_facts, min(keep_incorrect, len(incorrect_facts)))
            
            options, correct_indices, roman_mapping = create_option_mapping(correct_facts, incorrect_facts)
            
            # Generate all three types
            simple_combos = generate_simple_combinations(options, correct_indices, roman_mapping)
            quantifier_combos = generate_exact_quantifiers(options, correct_indices)
            exclusion_combos = generate_exclusion_sets(options, correct_indices, roman_mapping)
            
            # Format output
            formatted_result = format_output(question, options, roman_mapping, 
                                           simple_combos, quantifier_combos, exclusion_combos)
            
            results.append(formatted_result)
            
        except Exception as e:
            print(f"Error processing row {idx}: {e}")
            continue
    
    final_result = {
        "total_questions_processed": len(results),
        "questions": results
    }
    
    # Save results
    if output_file_path:
        with open(output_file_path, 'w', encoding='utf-8') as f:
            json.dump(final_result, f, indent=2, ensure_ascii=False)
        print(f"Results saved to {output_file_path}")
    
    return final_result

def sample_random_combinations(results: Dict, num_samples_per_type: int = 3) -> Dict:
    """Sample random combinations from each type for testing"""
    
    sampled_results = []
    
    for question_data in results["questions"]:
        question = question_data["question"]
        
        samples = {
            "question": question,
            "options": question_data["options"],
            "sampled_combinations": {}
        }
        
        # Sample from each combination type
        combinations = question_data["combinations"]
        
        # Simple combinations
        for combo_type in ["AND_combinations", "OR_combinations", "NEITHER_combinations"]:
            correct_options = combinations["simple_combinations"][combo_type]["correct"]
            incorrect_options = combinations["simple_combinations"][combo_type]["incorrect"]
            
            samples["sampled_combinations"][combo_type] = {
                "correct_sample": random.sample(correct_options, min(num_samples_per_type, len(correct_options))),
                "incorrect_sample": random.sample(incorrect_options, min(num_samples_per_type, len(incorrect_options)))
            }
        
        # Exact quantifiers
        correct_quant = combinations["exact_quantifiers"]["correct"] 
        incorrect_quant = combinations["exact_quantifiers"]["incorrect"]
        samples["sampled_combinations"]["exact_quantifiers"] = {
            "correct_sample": random.sample(correct_quant, min(num_samples_per_type, len(correct_quant))),
            "incorrect_sample": random.sample(incorrect_quant, min(num_samples_per_type, len(incorrect_quant)))
        }
        
        # Exclusion sets
        # correct_excl = combinations["exclusion_sets"]["correct"]
        # incorrect_excl = combinations["exclusion_sets"]["incorrect"] 
        # samples["sampled_combinations"]["exclusion_sets"] = {
        #     "correct_sample": random.sample(correct_excl, min(num_samples_per_type, len(correct_excl))),
        #     "incorrect_sample": random.sample(incorrect_excl, min(num_samples_per_type, len(incorrect_excl)))
        # }
        
        sampled_results.append(samples)
    
    return {"sampled_questions": sampled_results}

# Test function
def test_meta_logical_system():
    """Test the complete system"""
    
    # Generate combinations
    results = generate_meta_logical_combinations('/kaggle/input/csqa-dataset/csqa_second_stage-5000.csv', 'logical_complete_meta_output.json')
    
    # Sample some combinations
    samples = sample_random_combinations(results, num_samples_per_type=2)
    
    # Display results
    print("=== COMPLETE META-LOGICAL COMBINATIONS ===")
    
    for question_data in results["questions"][:2]:
        print(f"\nQuestion: {question_data['question']}")
        print("\nOptions:")
        for roman, option in question_data["options"].items():
            print(f"  {roman}. {option}")
        
        print(f"\n--- AND COMBINATIONS ---")
        print(f"Correct: {question_data['combinations']['simple_combinations']['AND_combinations']['correct']}")
        print(f"Incorrect: {question_data['combinations']['simple_combinations']['AND_combinations']['incorrect']}")

        print(f"\n--- OR COMBINATIONS ---")
        print(f"Correct: {question_data['combinations']['simple_combinations']['OR_combinations']['correct']}")
        print(f"Incorrect: {question_data['combinations']['simple_combinations']['OR_combinations']['incorrect']}")


        print(f"\n--- Neither COMBINATIONS ---")
        print(f"Correct: {question_data['combinations']['simple_combinations']['NEITHER_combinations']['correct']}")
        print(f"Incorrect: {question_data['combinations']['simple_combinations']['NEITHER_combinations']['incorrect']}")
        
        print(f"\n--- EXACT QUANTIFIERS ---") 
        print(f"Correct: {question_data['combinations']['exact_quantifiers']['correct']}")
        print(f"Incorrect: {question_data['combinations']['exact_quantifiers']['incorrect']}")
        
        # print(f"\n--- EXCLUSION SETS ---")
        # print(f"Correct: {question_data['combinations']['exclusion_sets']['correct']}")
        # print(f"Incorrect: {question_data['combinations']['exclusion_sets']['incorrect']}")

if __name__ == "__main__":
    test_meta_logical_system()

Results saved to logical_complete_meta_output.json
=== COMPLETE META-LOGICAL COMBINATIONS ===

Question: The sanctions against the school were a punishing blow, and they seemed to what the efforts the school had made to change?

Options:
  i. accelerate change
  ii. facilitate growth
  iii. negate
  iv. invalidate
  v. enforce rules
  vi. undermine
  vii. promote discipline

--- AND COMBINATIONS ---
Correct: ['iii and iv', 'iii and vi', 'iv and vi']
Incorrect: ['i and ii', 'i and iii', 'i and iv', 'i and v', 'i and vi', 'i and vii', 'ii and iii', 'ii and iv', 'ii and v', 'ii and vi', 'ii and vii', 'iii and v', 'iii and vii', 'iv and v', 'iv and vii', 'v and vi', 'v and vii', 'vi and vii']

--- OR COMBINATIONS ---
Correct: ['Either i or iii', 'Either i or iv', 'Either i or vi', 'Either ii or iii', 'Either ii or iv', 'Either ii or vi', 'Either iii or iv', 'Either iii or v', 'Either iii or vi', 'Either iii or vii', 'Either iv or v', 'Either iv or vi', 'Either iv or vii', 'Either v or vi',