In [4]:
# elite_math_reasoning_2_5k_COMPLETE.py - ULTIMATE math dataset for actual reasoning improvement
import random, json, sympy as sp, math
from pathlib import Path
from typing import List, Dict, Tuple
from fractions import Fraction

random.seed(42)

# Output directory  
BASE_DIR = Path.home() / "gemma-grpo-project"
DATA_DIR = BASE_DIR / "data" / "processed"
OUTPUT_FILE = DATA_DIR / "elite_math_reasoning_2_5k.jsonl"
DATA_DIR.mkdir(parents=True, exist_ok=True)

class UltimateMathGenerator:
    def __init__(self):
        self.problems = []
        self.used_problems = set()
    
    def add_unique_problem(self, question: str, reasoning: str, answer: str, 
                          topic: str = "general", difficulty: str = "intermediate"):
        problem_hash = hash(question.strip().lower())
        if problem_hash in self.used_problems:
            return False
        
        self.used_problems.add(problem_hash)
        output = f"{reasoning}\n\nTherefore, the answer is {answer}."
        
        self.problems.append({
            "instruction": "Solve this problem showing clear mathematical reasoning.",
            "input": question,
            "output": output,
            "meta": {"topic": topic, "difficulty": difficulty}
        })
        return True

    def generate_strategic_arithmetic(self, n: int = 500):
        problems_added = 0
        attempts = 0
        
        while problems_added < n and attempts < n * 3:
            attempts += 1
            question, reasoning, answer = None, None, None
            
            problem_type = random.choice([
                'mental_math_tricks', 'estimation_then_exact', 'fraction_concepts',
                'divisibility_rules', 'percentage_intuition', 'number_properties'
            ])
            
            try:
                if problem_type == 'mental_math_tricks':
                    base = random.randint(85, 115)
                    offset = random.randint(3, 20)
                    
                    if random.choice([True, False]):
                        result = base * (100 + offset)
                        question = f"Calculate {base} √ó {100 + offset} using mental math strategies."
                        reasoning = (f"I can use the distributive property to make this easier.\n"
                                   f"{base} √ó {100 + offset} = {base} √ó 100 + {base} √ó {offset}\n"
                                   f"= {base * 100} + {base * offset} = {result}\n\n"
                                   f"This strategy works because multiplication distributes over addition.")
                        answer = str(result)
                    else:
                        a = random.randint(15, 35)
                        result = 25  # a¬≤ - (a-5)(a+5) always equals 25
                        question = f"Calculate {a}¬≤ - ({a-5}) √ó ({a+5}) without full expansion."
                        reasoning = (f"I notice this fits the difference of squares pattern.\n"
                                   f"Using the identity: (a-b)(a+b) = a¬≤ - b¬≤\n"
                                   f"Here: ({a}-5)({a}+5) = {a}¬≤ - 25\n"
                                   f"So: {a}¬≤ - ({a}¬≤ - 25) = 25")
                        answer = "25"
                
                elif problem_type == 'estimation_then_exact':
                    a, b = random.randint(47, 98), random.randint(23, 67)
                    exact = a * b
                    est_a, est_b = round(a, -1), round(b, -1)
                    estimate = est_a * est_b
                    
                    question = f"First estimate, then calculate exactly: {a} √ó {b}"
                    reasoning = (f"Estimation helps verify my final answer is reasonable.\n\n"
                               f"Estimation: {a} ‚âà {est_a}, {b} ‚âà {est_b}\n"
                               f"So {a} √ó {b} ‚âà {est_a} √ó {est_b} = {estimate}\n\n"
                               f"Exact calculation: {a} √ó {b} = {exact}\n\n"
                               f"Verification: {exact} is close to my estimate of {estimate}, confirming correctness.")
                    answer = str(exact)
                    
                elif problem_type == 'fraction_concepts':
                    num1, den1 = random.randint(1, 8), random.randint(2, 12)
                    num2, den2 = random.randint(1, 8), random.randint(2, 12)
                    frac1, frac2 = Fraction(num1, den1), Fraction(num2, den2)
                    
                    if random.choice([True, False]):
                        result = frac1 + frac2
                        question = f"Add these fractions: {frac1} + {frac2}"
                        reasoning = (f"To add fractions, I need a common denominator.\n"
                                   f"Finding LCM of {frac1.denominator} and {frac2.denominator}...\n"
                                   f"Converting and adding: {result}")
                    else:
                        result = frac1 * frac2
                        question = f"Multiply these fractions: {frac1} √ó {frac2}"
                        reasoning = (f"To multiply fractions: multiply numerators, multiply denominators.\n"
                                   f"{frac1} √ó {frac2} = ({frac1.numerator} √ó {frac2.numerator}) / ({frac1.denominator} √ó {frac2.denominator}) = {result}")
                    answer = str(result)
                    
                elif problem_type == 'divisibility_rules':
                    number = random.randint(234, 9876)
                    divisor = random.choice([2, 3, 9])
                    is_divisible = (number % divisor == 0)
                    
                    question = f"Without calculating, determine if {number} is divisible by {divisor}."
                    
                    if divisor == 2:
                        last_digit = number % 10
                        reasoning = (f"A number is divisible by 2 if its last digit is even.\n"
                                   f"The last digit of {number} is {last_digit}.\n"
                                   f"Since {last_digit} is {'even' if last_digit % 2 == 0 else 'odd'}, {number} is {'divisible' if is_divisible else 'not divisible'} by 2.")
                    elif divisor == 3:
                        digit_sum = sum(int(d) for d in str(number))
                        reasoning = (f"A number is divisible by 3 if the sum of its digits is divisible by 3.\n"
                                   f"Sum of digits: {' + '.join(str(number))} = {digit_sum}\n"
                                   f"Since {digit_sum} {'is' if digit_sum % 3 == 0 else 'is not'} divisible by 3, {number} {'is' if is_divisible else 'is not'} divisible by 3.")
                    else:  # divisor == 9
                        digit_sum = sum(int(d) for d in str(number))
                        reasoning = (f"A number is divisible by 9 if the sum of its digits is divisible by 9.\n"
                                   f"Sum of digits: {' + '.join(str(number))} = {digit_sum}\n"
                                   f"Since {digit_sum} {'is' if digit_sum % 9 == 0 else 'is not'} divisible by 9, {number} {'is' if is_divisible else 'is not'} divisible by 9.")
                    answer = 'Yes' if is_divisible else 'No'
                
                if question and reasoning and answer:
                    difficulty = "basic" if problem_type in ['mental_math_tricks', 'estimation_then_exact'] else "intermediate"
                    if self.add_unique_problem(question, reasoning, answer, "arithmetic", difficulty):
                        problems_added += 1
                        
            except Exception:
                continue

    def generate_algebraic_reasoning(self, n: int = 600):
        problems_added = 0
        attempts = 0
        
        while problems_added < n and attempts < n * 3:
            attempts += 1
            question, reasoning, answer = None, None, None
            
            problem_type = random.choice([
                'linear_equations', 'quadratic_factoring', 'exponential_equations', 
                'system_solving', 'polynomial_analysis'
            ])
            
            try:
                if problem_type == 'linear_equations':
                    a, b, c = random.randint(2, 8), random.randint(5, 25), random.randint(10, 50)
                    x_val = (c - b) / a
                    
                    question = f"Solve: {a}x + {b} = {c}"
                    reasoning = (f"I need to isolate x by undoing operations in reverse order.\n\n"
                               f"Starting: {a}x + {b} = {c}\n"
                               f"Subtract {b}: {a}x = {c} - {b} = {c - b}\n"
                               f"Divide by {a}: x = {c - b} √∑ {a} = {x_val}\n\n"
                               f"Check: {a}({x_val}) + {b} = {a * x_val} + {b} = {c} ‚úì")
                    answer = str(x_val)
                    
                elif problem_type == 'quadratic_factoring':
                    roots = sorted([random.randint(-5, 8) for _ in range(2)])
                    while roots[0] == roots[1]:
                        roots = sorted([random.randint(-5, 8) for _ in range(2)])
                    r1, r2 = roots[0], roots[1]
                    
                    b_coeff = -(r1 + r2)
                    c_coeff = r1 * r2
                    
                    question = f"Factor: x¬≤ {'+' if b_coeff >= 0 else ''}{b_coeff}x + {c_coeff}"
                    reasoning = (f"I need two numbers that multiply to {c_coeff} and add to {b_coeff}.\n"
                               f"Those numbers are {r1} and {r2}:\n"
                               f"{r1} √ó {r2} = {c_coeff} ‚úì, {r1} + {r2} = {b_coeff} ‚úì\n"
                               f"Therefore: x¬≤ {'+' if b_coeff >= 0 else ''}{b_coeff}x + {c_coeff} = (x - {r1})(x - {r2})")
                    answer = f"(x - {r1})(x - {r2})"
                
                elif problem_type == 'exponential_equations':
                    base = random.choice([2, 3, 4, 5])
                    exponent = random.randint(2, 4)
                    result = base ** exponent
                    
                    question = f"Solve: {base}^x = {result}"
                    reasoning = (f"I need to find what power of {base} equals {result}.\n\n"
                               f"Checking powers of {base}:\n")
                    
                    for i in range(1, exponent + 1):
                        reasoning += f"{base}^{i} = {base**i}\n"
                    
                    reasoning += f"\nTherefore, x = {exponent}"
                    answer = str(exponent)
                
                if question and reasoning and answer:
                    if self.add_unique_problem(question, reasoning, answer, "algebra", "intermediate"):
                        problems_added += 1
                        
            except Exception:
                continue

    def generate_geometric_insight(self, n: int = 500):
        problems_added = 0
        attempts = 0
        
        while problems_added < n and attempts < n * 3:
            attempts += 1
            question, reasoning, answer = None, None, None
            
            problem_type = random.choice([
                'pythagorean_theorem', 'area_calculations', 'circle_properties',
                'volume_reasoning', 'coordinate_geometry'
            ])
            
            try:
                if problem_type == 'pythagorean_theorem':
                    triples = [(3,4,5), (5,12,13), (8,15,17), (6,8,10), (9,12,15)]
                    a, b, c = random.choice(triples)
                    
                    scenario = random.choice(['find_hypotenuse', 'verify_right', 'distance'])
                    
                    if scenario == 'find_hypotenuse':
                        question = f"Find the hypotenuse of a right triangle with legs {a} and {b}."
                        reasoning = (f"Using the Pythagorean theorem: a¬≤ + b¬≤ = c¬≤\n\n"
                                   f"Given: a = {a}, b = {b}\n"
                                   f"Calculating: {a}¬≤ + {b}¬≤ = {a*a} + {b*b} = {a*a + b*b}\n"
                                   f"Therefore: c = ‚àö{a*a + b*b} = {c}\n\n"
                                   f"The geometric meaning: The square on the hypotenuse equals the sum of squares on the legs.")
                        answer = str(c)
                    
                    elif scenario == 'verify_right':
                        question = f"Is a triangle with sides {a}, {b}, {c} a right triangle?"
                        reasoning = (f"To verify, I check if the Pythagorean theorem holds.\n\n"
                                   f"Testing: {a}¬≤ + {b}¬≤ = {c}¬≤?\n"
                                   f"{a*a} + {b*b} = {c*c}?\n"
                                   f"{a*a + b*b} = {c*c}? Yes!\n\n"
                                   f"Since the theorem is satisfied, this is a right triangle.")
                        answer = "Yes"
                
                elif problem_type == 'area_calculations':
                    shape = random.choice(['triangle', 'rectangle', 'circle'])
                    
                    if shape == 'triangle':
                        base, height = random.randint(5, 15), random.randint(4, 12)
                        area = 0.5 * base * height
                        question = f"Find the area of a triangle with base {base} and height {height}."
                        reasoning = (f"Using the triangle area formula: A = ¬Ω √ó base √ó height\n"
                                   f"A = ¬Ω √ó {base} √ó {height} = {area}")
                        answer = f"{area} square units"
                    
                    elif shape == 'rectangle':
                        length, width = random.randint(6, 15), random.randint(4, 12)
                        area = length * width
                        question = f"Find the area of a rectangle with length {length} and width {width}."
                        reasoning = (f"Using the rectangle area formula: A = length √ó width\n"
                                   f"A = {length} √ó {width} = {area}")
                        answer = f"{area} square units"
                
                elif problem_type == 'circle_properties':
                    radius = random.randint(3, 12)
                    circumference = 2 * math.pi * radius
                    area = math.pi * radius * radius
                    
                    question = f"A circle has radius {radius}. Find its circumference and area."
                    reasoning = (f"Using circle formulas:\n\n"
                               f"Circumference: C = 2œÄr = 2œÄ({radius}) = {2 * radius}œÄ ‚âà {circumference:.2f}\n"
                               f"Area: A = œÄr¬≤ = œÄ({radius})¬≤ = {radius * radius}œÄ ‚âà {area:.2f}\n\n"
                               f"œÄ represents the ratio of circumference to diameter.")
                    answer = f"C = {2 * radius}œÄ, A = {radius * radius}œÄ"
                
                if question and reasoning and answer:
                    if self.add_unique_problem(question, reasoning, answer, "geometry", "intermediate"):
                        problems_added += 1
                        
            except Exception:
                continue

    def generate_advanced_applications(self, n: int = 400):
        problems_added = 0
        attempts = 0
        
        while problems_added < n and attempts < n * 3:
            attempts += 1
            question, reasoning, answer = None, None, None
            
            problem_type = random.choice([
                'word_problems', 'optimization', 'exponential_growth', 'financial_math'
            ])
            
            try:
                if problem_type == 'word_problems':
                    scenario = random.choice(['distance_rate_time', 'mixture', 'age'])
                    
                    if scenario == 'distance_rate_time':
                        speed, time = random.randint(30, 80), random.randint(2, 8)
                        distance = speed * time
                        question = f"A car travels at {speed} mph for {time} hours. How far does it travel?"
                        reasoning = (f"Using the distance formula: Distance = Speed √ó Time\n"
                                   f"Distance = {speed} √ó {time} = {distance} miles")
                        answer = f"{distance} miles"
                
                elif problem_type == 'exponential_growth':
                    initial = random.randint(500, 2000)
                    rate = random.randint(5, 15)
                    time = random.randint(3, 6)
                    final = initial * ((1 + rate/100) ** time)
                    
                    question = f"A population of {initial} grows at {rate}% per year. What is it after {time} years?"
                    reasoning = (f"Using exponential growth: P(t) = P‚ÇÄ(1 + r)·µó\n\n"
                               f"P({time}) = {initial}(1 + {rate/100})^{time}\n"
                               f"P({time}) = {initial}(1.{rate:02d})^{time} ‚âà {final:.0f}")
                    answer = f"{final:.0f}"
                
                elif problem_type == 'financial_math':
                    principal = random.randint(1000, 5000)
                    rate = random.randint(4, 12)
                    time = random.randint(2, 5)
                    amount = principal * ((1 + rate/100) ** time)
                    interest = amount - principal
                    
                    question = f"Find compound interest on ${principal} at {rate}% for {time} years."
                    reasoning = (f"Using compound interest: A = P(1 + r)·µó\n\n"
                               f"A = {principal}(1 + {rate/100})^{time} = ${amount:.2f}\n"
                               f"Interest = ${amount:.2f} - ${principal} = ${interest:.2f}")
                    answer = f"${interest:.2f}"
                
                if question and reasoning and answer:
                    if self.add_unique_problem(question, reasoning, answer, "applications", "advanced"):
                        problems_added += 1
                        
            except Exception:
                continue

    def generate_calculus_basics(self, n: int = 200):
        problems_added = 0
        attempts = 0
        
        while problems_added < n and attempts < n * 3:
            attempts += 1
            question, reasoning, answer = None, None, None
            
            try:
                problem_type = random.choice(['derivatives', 'integrals', 'applications'])
                
                if problem_type == 'derivatives':
                    a, b = random.randint(2, 6), random.randint(1, 10)
                    question = f"Find the derivative of f(x) = {a}x¬≤ + {b}x."
                    reasoning = (f"Using the power rule: d/dx(x‚Åø) = nx‚Åø‚Åª¬π\n\n"
                               f"f'(x) = d/dx({a}x¬≤) + d/dx({b}x)\n"
                               f"f'(x) = {a}(2x) + {b}(1) = {2*a}x + {b}")
                    answer = f"f'(x) = {2*a}x + {b}"
                
                elif problem_type == 'integrals':
                    a = random.randint(2, 8)
                    question = f"Find ‚à´{a}x dx."
                    reasoning = (f"Using the power rule for integration: ‚à´x‚Åø dx = x‚Åø‚Å∫¬π/(n+1) + C\n\n"
                               f"‚à´{a}x dx = {a} ‚à´x dx = {a} √ó x¬≤/2 + C = {a//2 if a%2==0 else f'{a}/2'}x¬≤ + C")
                    answer = f"{a//2 if a%2==0 else f'{a}/2'}x¬≤ + C"
                
                if question and reasoning and answer:
                    if self.add_unique_problem(question, reasoning, answer, "calculus", "advanced"):
                        problems_added += 1
                        
            except Exception:
                continue

    def generate_stats_reasoning(self, n: int = 200):
        problems_added = 0
        attempts = 0
        
        while problems_added < n and attempts < n * 3:
            attempts += 1
            question, reasoning, answer = None, None, None
            
            try:
                problem_type = random.choice(['probability', 'statistics', 'data_analysis'])
                
                if problem_type == 'probability':
                    total = random.randint(20, 50)
                    favorable = random.randint(5, total//2)
                    prob = favorable / total
                    
                    question = f"A bag has {total} balls, {favorable} are red. What's the probability of drawing red?"
                    reasoning = (f"Probability = Favorable outcomes / Total outcomes\n"
                               f"P(red) = {favorable}/{total} = {prob:.3f}")
                    answer = f"{favorable}/{total} = {prob:.3f}"
                
                elif problem_type == 'statistics':
                    data = sorted([random.randint(10, 100) for _ in range(5)])
                    mean = sum(data) / len(data)
                    median = data[len(data)//2]
                    
                    question = f"Find the mean and median of: {', '.join(map(str, data))}"
                    reasoning = (f"Mean = Sum / Count = {sum(data)} / {len(data)} = {mean:.1f}\n"
                               f"Median = Middle value = {median}")
                    answer = f"Mean: {mean:.1f}, Median: {median}"
                
                if question and reasoning and answer:
                    if self.add_unique_problem(question, reasoning, answer, "statistics", "intermediate"):
                        problems_added += 1
                        
            except Exception:
                continue

    def generate_error_analysis(self, n: int = 300):
        problems_added = 0
        attempts = 0
        
        while problems_added < n and attempts < n * 3:
            attempts += 1
            question, reasoning, answer = None, None, None
            
            try:
                error_type = random.choice(['order_of_operations', 'algebra_errors', 'fraction_errors'])
                
                if error_type == 'order_of_operations':
                    a, b, c = random.randint(2, 9), random.randint(2, 9), random.randint(2, 9)
                    correct = a + b * c
                    wrong = (a + b) * c
                    
                    question = f"Calculate {a} + {b} √ó {c} and explain why order matters."
                    reasoning = (f"Order of operations (PEMDAS) prevents ambiguity.\n\n"
                               f"Correct: Multiplication before addition\n"
                               f"First: {b} √ó {c} = {b*c}\n"
                               f"Then: {a} + {b*c} = {correct}\n\n"
                               f"Wrong approach: ({a} + {b}) √ó {c} = {wrong}")
                    answer = str(correct)
                
                elif error_type == 'algebra_errors':
                    a = random.randint(2, 8)
                    correct_expansion = f"x¬≤ + {2*a}x + {a*a}"
                    
                    question = f"Expand (x + {a})¬≤ and identify a common error."
                    reasoning = (f"Correct expansion using FOIL:\n"
                               f"(x + {a})¬≤ = (x + {a})(x + {a})\n"
                               f"= x¬≤ + {a}x + {a}x + {a*a} = x¬≤ + {2*a}x + {a*a}\n\n"
                               f"Common error: (x + {a})¬≤ ‚â† x¬≤ + {a*a}\n"
                               f"This misses the middle term {2*a}x!")
                    answer = correct_expansion
                
                if question and reasoning and answer:
                    if self.add_unique_problem(question, reasoning, answer, "error_analysis", "intermediate"):
                        problems_added += 1
                        
            except Exception:
                continue

    def save_ultimate_dataset(self):
        random.shuffle(self.problems)
        
        with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
            for problem in self.problems:
                f.write(json.dumps(problem, ensure_ascii=False) + '\n')
        
        print(f"üèÜ Generated {len(self.problems)} ELITE problems saved to: {OUTPUT_FILE}")
        
        topics = {}
        difficulties = {}
        for p in self.problems:
            topics[p['meta']['topic']] = topics.get(p['meta']['topic'], 0) + 1
            difficulties[p['meta']['difficulty']] = difficulties.get(p['meta']['difficulty'], 0) + 1
        
        print(f"\nüìä QUALITY METRICS:")
        print(f"Topics: {dict(sorted(topics.items()))}")
        print(f"Difficulty: {dict(sorted(difficulties.items()))}")
        print(f"Unique problems: {len(self.used_problems)}")
        print(f"\n‚ú® FEATURES: Conceptual understanding, error prevention, natural language")

def main():
    print("üèÜ ULTIMATE MATH REASONING DATASET (2.5K+)")
    print("üéØ Goal: Actually improve mathematical thinking")
    print("=" * 60)
    
    generator = UltimateMathGenerator()
    
    print("üî¢ Generating strategic arithmetic...")
    generator.generate_strategic_arithmetic(500)
    
    print("üî§ Generating algebraic reasoning...")
    generator.generate_algebraic_reasoning(600)
    
    print("üìê Generating geometric insights...")
    generator.generate_geometric_insight(500)
    
    print("üöÄ Generating advanced applications...")
    generator.generate_advanced_applications(400)
    
    print("üìà Generating calculus fundamentals...")
    generator.generate_calculus_basics(200)
    
    print("üìä Generating statistics reasoning...")
    generator.generate_stats_reasoning(200)
    
    print("‚ö†Ô∏è Generating error analysis...")
    generator.generate_error_analysis(300)
    
    generator.save_ultimate_dataset()
    
    print(f"\nüéâ COMPLETE! ~{len(generator.problems)} problems")
    print(f"‚úÖ READY FOR CONSERVATIVE FINE-TUNING!")
    print(f"üìã Settings: LR=3e-5, Epochs=0.5-1, Batch=2-4")

if __name__ == "__main__":
    main()


üèÜ ULTIMATE MATH REASONING DATASET (2.5K+)
üéØ Goal: Actually improve mathematical thinking
üî¢ Generating strategic arithmetic...
üî§ Generating algebraic reasoning...
üìê Generating geometric insights...
üöÄ Generating advanced applications...
üìà Generating calculus fundamentals...
üìä Generating statistics reasoning...
‚ö†Ô∏è Generating error analysis...
üèÜ Generated 1981 ELITE problems saved to: /home/ai_pc_user/gemma-grpo-project/data/processed/elite_math_reasoning_2_5k.jsonl

üìä QUALITY METRICS:
Topics: {'algebra': 453, 'applications': 400, 'arithmetic': 500, 'calculus': 55, 'error_analysis': 232, 'geometry': 141, 'statistics': 200}
Difficulty: {'advanced': 455, 'basic': 218, 'intermediate': 1308}
Unique problems: 1981

‚ú® FEATURES: Conceptual understanding, error prevention, natural language

üéâ COMPLETE! ~1981 problems
‚úÖ READY FOR CONSERVATIVE FINE-TUNING!
üìã Settings: LR=3e-5, Epochs=0.5-1, Batch=2-4


In [6]:
# elite_math_finetune_unsloth.py - Conservative fine-tuning with Unsloth
import os, json, random
from pathlib import Path
from typing import List, Dict

# Import Unsloth FIRST for maximum optimization
from unsloth import FastLanguageModel
from transformers import TrainingArguments
from trl import SFTTrainer
from datasets import Dataset
from tqdm.auto import tqdm

random.seed(42)

# Paths
BASE_DIR = Path.home() / "gemma-grpo-project"
DATA_DIR = BASE_DIR / "data" / "processed"
ELITE_DATASET = DATA_DIR / "elite_math_reasoning_2_5k.jsonl"
EVAL_DIR = DATA_DIR / "eval_packs"
OUTPUT_DIR = BASE_DIR / "Elite-Math-Section"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Model settings
BASE_MODEL = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit"
MAX_SEQ_LEN = 1024
DTYPE = "bfloat16"

# CONSERVATIVE training settings
LEARNING_RATE = 3e-5        # Much lower than previous 2e-4
EPOCHS = 1.0                # Full epoch to see all data
BATCH_SIZE = 4              # As requested
GRAD_ACCUM = 2              # Effective batch = 32
WARMUP_RATIO = 0.1
WEIGHT_DECAY = 0.01
SAVE_STEPS = 10             # Very frequent saves
EVAL_STEPS = 10             # Monitor closely

def load_model():
    """Load model with Unsloth optimizations"""
    print(f"üöÄ Loading base model with Unsloth: {BASE_MODEL}")
    
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=BASE_MODEL,
        max_seq_length=MAX_SEQ_LEN,
        dtype=DTYPE,
        load_in_4bit=True,
        device_map="auto",
        trust_remote_code=True,
    )
    
    # Apply LoRA with Unsloth optimizations
    model = FastLanguageModel.get_peft_model(
        model,
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
        use_gradient_checkpointing="unsloth",  # Unsloth's optimized checkpointing
    )
    
    # Disable cache for training
    try:
        model.config.use_cache = False
    except:
        pass
    
    print(f"‚úÖ Model loaded with LoRA. GPU: {os.environ.get('CUDA_VISIBLE_DEVICES', 'auto')}")
    return model, tokenizer

def load_elite_dataset():
    """Load our high-quality training dataset"""
    print(f"üìö Loading elite training dataset: {ELITE_DATASET}")
    
    examples = []
    with open(ELITE_DATASET, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if line:
                try:
                    examples.append(json.loads(line))
                except:
                    continue
    
    print(f"‚úÖ Loaded {len(examples)} elite training examples")
    return Dataset.from_list(examples)

def load_external_eval():
    """Load existing GSM8K/evaluation datasets for comparison"""
    print(f"üìä Loading external evaluation datasets from: {EVAL_DIR}")
    
    eval_examples = []
    
    # Load existing eval files from your previous fine-tuning
    eval_files = [
        "eval_math500_like.jsonl",
        "eval_aime_like.jsonl", 
        "eval_svamp_robust.jsonl"
    ]
    
    for filename in eval_files:
        filepath = EVAL_DIR / filename
        if filepath.exists():
            print(f"   Loading {filename}...")
            with open(filepath, 'r', encoding='utf-8') as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try:
                            example = json.loads(line)
                            eval_examples.append(example)
                        except:
                            continue
    
    # Limit to 200 examples as requested
    random.shuffle(eval_examples)
    eval_examples = eval_examples[:200]
    
    print(f"‚úÖ Using {len(eval_examples)} external evaluation examples")
    return Dataset.from_list(eval_examples) if eval_examples else None

def format_example(example: Dict) -> Dict:
    """Format examples for training (same system as before)"""
    instruction = example.get("instruction", "").strip()
    input_text = example.get("input", "").strip()
    output_text = example.get("output", "").strip()
    
    # Combine instruction and input
    if input_text:
        user_content = f"{instruction}\n\n{input_text}" if instruction else input_text
    else:
        user_content = instruction
    
    # Use chat template
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": user_content},
        {"role": "assistant", "content": output_text}
    ]
    
    # Apply chat template
    try:
        formatted = tokenizer.apply_chat_template(
            messages, 
            tokenize=False, 
            add_generation_prompt=False
        )
    except:
        # Fallback formatting
        formatted = f"<|system|>\nYou are a helpful assistant.\n\n<|user|>\n{user_content}\n\n<|assistant|>\n{output_text}"
    
    return {"text": formatted}

def find_latest_checkpoint():
    """Auto-resume from latest checkpoint"""
    checkpoints = []
    for path in OUTPUT_DIR.glob("checkpoint-*"):
        try:
            step_num = int(path.name.split("-")[-1])
            checkpoints.append((step_num, str(path)))
        except:
            continue
    
    if checkpoints:
        checkpoints.sort()
        latest_checkpoint = checkpoints[-1][1]
        print(f"üîÑ Found checkpoint to resume from: {latest_checkpoint}")
        return latest_checkpoint
    
    print("üÜï No checkpoints found, starting fresh")
    return None

def main():
    print("üèÜ ELITE MATH FINE-TUNING WITH UNSLOTH")
    print("üéØ Conservative settings to preserve base capabilities")
    print("=" * 60)
    
    # Load model first
    global tokenizer
    model, tokenizer = load_model()
    
    # Load datasets
    train_dataset = load_elite_dataset()
    eval_dataset = load_external_eval()
    
    # Format datasets
    num_proc = max(1, os.cpu_count() - 1)
    print(f"üîÑ Formatting datasets (using {num_proc} processes)...")
    
    train_dataset = train_dataset.map(
        format_example, 
        num_proc=num_proc,
        desc="Formatting training data"
    )
    
    if eval_dataset:
        eval_dataset = eval_dataset.map(
            format_example,
            num_proc=min(2, num_proc),
            desc="Formatting eval data"
        )
    
    # Training arguments with CONSERVATIVE settings
    training_args = TrainingArguments(
        output_dir=str(OUTPUT_DIR),
        per_device_train_batch_size=BATCH_SIZE,
        per_device_eval_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=GRAD_ACCUM,
        learning_rate=LEARNING_RATE,
        num_train_epochs=EPOCHS,
        lr_scheduler_type="cosine",
        warmup_ratio=WARMUP_RATIO,
        weight_decay=WEIGHT_DECAY,
        
        # Frequent monitoring
        logging_steps=5,
        save_steps=SAVE_STEPS,
        eval_steps=EVAL_STEPS if eval_dataset else None,
        eval_strategy="steps" if eval_dataset else "no",
        
        # Best model tracking
        save_total_limit=10,
        load_best_model_at_end=True if eval_dataset else False,
        metric_for_best_model="eval_loss" if eval_dataset else None,
        greater_is_better=False,
        
        # Optimization
        bf16=(DTYPE == "bfloat16"),
        fp16=(DTYPE == "float16"),
        optim="paged_adamw_8bit",
        gradient_checkpointing=True,
        dataloader_pin_memory=True,
        dataloader_num_workers=min(4, num_proc),
        group_by_length=True,
        remove_unused_columns=True,
        
        # Misc
        report_to=[],
        save_safetensors=True,
        seed=42,
    )
    
    # Create trainer with Unsloth optimizations
    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        dataset_text_field="text",
        max_seq_length=MAX_SEQ_LEN,
        packing=True,  # Unsloth optimization
        args=training_args,
    )
    
    # Check for resume checkpoint
    resume_from = find_latest_checkpoint()
    
    # Start training
    print(f"\nüöÄ Starting conservative fine-tuning...")
    print(f"üìä Training examples: {len(train_dataset)}")
    print(f"üìä Eval examples: {len(eval_dataset) if eval_dataset else 0}")
    print(f"‚öôÔ∏è  Settings: LR={LEARNING_RATE}, Epochs={EPOCHS}, Batch={BATCH_SIZE}")
    print(f"üíæ Saving every {SAVE_STEPS} steps")
    print("-" * 60)
    
    trainer.train(resume_from_checkpoint=resume_from)
    
    # Save final model
    print("\nüíæ Saving final model...")
    trainer.save_model()
    tokenizer.save_pretrained(OUTPUT_DIR)
    
    print(f"\nüéâ ELITE MATH FINE-TUNING COMPLETE!")
    print(f"üìÅ Model saved to: {OUTPUT_DIR}")
    print(f"‚ú® Ready for evaluation against your previous results!")

if __name__ == "__main__":
    main()


üèÜ ELITE MATH FINE-TUNING WITH UNSLOTH
üéØ Conservative settings to preserve base capabilities
üöÄ Loading base model with Unsloth: unsloth/Llama-3.2-3B-Instruct-bnb-4bit
Are you certain you want to do remote code execution?
==((====))==  Unsloth 2025.9.1: Fast Llama patching. Transformers: 4.56.1. vLLM: 0.10.1.1.
   \\   /|    NVIDIA GeForce RTX 3060. Num GPUs = 1. Max memory: 11.638 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu128. CUDA: 8.6. CUDA Toolkit: 12.8. Triton: 3.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.31. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
‚úÖ Model loaded with LoRA. GPU: auto
üìö Loading elite training dataset: /home/ai_pc_user/gemma-grpo-project/data/processed/elite_math_reasoning_2_5k.jsonl
‚úÖ Loaded 1981 elite training examples
üìä Loading external evaluation datasets from: /home/ai_pc_user/gemma-grpo-project/data/proc

Formatting training data (num_proc=15): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1981/1981 [00:00<00:00, 6827.71 examples/s]
Unsloth: Tokenizing ["text"] (num_proc=20): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1981/1981 [00:05<00:00, 389.82 examples/s]


üÜï No checkpoints found, starting fresh

üöÄ Starting conservative fine-tuning...
üìä Training examples: 1981
üìä Eval examples: 0
‚öôÔ∏è  Settings: LR=3e-05, Epochs=1.0, Batch=4
üíæ Saving every 10 steps
------------------------------------------------------------


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,981 | Num Epochs = 1 | Total steps = 248
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 2 x 1) = 8
 "-____-"     Trainable parameters = 24,313,856 of 3,237,063,680 (0.75% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,2.4526
10,2.5424
15,2.6808
20,2.4908
25,2.2496
30,1.8566
35,1.5407
40,1.1926
45,0.9292
50,0.9171



üíæ Saving final model...

üéâ ELITE MATH FINE-TUNING COMPLETE!
üìÅ Model saved to: /home/ai_pc_user/gemma-grpo-project/Elite-Math-Section
‚ú® Ready for evaluation against your previous results!


In [None]:
# compare_elite_vs_base.py - Test your Elite Math model vs Base model
import torch
from pathlib import Path
from unsloth import FastLanguageModel
import time

# Paths and settings
BASE_DIR = Path.home() / "gemma-grpo-project"
ELITE_MODEL_DIR = BASE_DIR / "Elite-Math-Section"
BASE_MODEL = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit"
SYSTEM_TEXT = "You are a helpful assistant."

# Model settings
MAX_SEQ_LEN = 1536
DTYPE = "bfloat16"
MAX_NEW_TOKENS = 1024
TEMPERATURE = 0.3

def load_both_models():
    """Load both base and elite fine-tuned models"""
    print("="*70)
    print("üî¨ LOADING MODELS FOR ELITE MATH COMPARISON")
    print("="*70)
    
    # Load base model
    print("\n[1/2] Loading BASE model...")
    base_model, base_tokenizer = FastLanguageModel.from_pretrained(
        model_name=BASE_MODEL,
        max_seq_length=MAX_SEQ_LEN,
        dtype=DTYPE,
        load_in_4bit=True,
        device_map={"": 0},
        trust_remote_code=True,
    )
    FastLanguageModel.for_inference(base_model)
    print("‚úÖ Base model loaded")
    
    # Load elite fine-tuned model
    print("\n[2/2] Loading ELITE MATH model...")
    elite_model, elite_tokenizer = FastLanguageModel.from_pretrained(
        model_name=str(ELITE_MODEL_DIR),
        max_seq_length=MAX_SEQ_LEN,
        dtype=DTYPE,
        load_in_4bit=True,
        device_map={"": 0},
        trust_remote_code=True,
    )
    FastLanguageModel.for_inference(elite_model)
    print("‚úÖ Elite Math model loaded")
    
    print(f"\n‚úÖ Both models ready on: {torch.cuda.get_device_name(0)}")
    return (base_model, base_tokenizer), (elite_model, elite_tokenizer)

def generate_response(model, tokenizer, prompt_text):
    """Generate response from a model"""
    messages = [
        {"role": "system", "content": SYSTEM_TEXT},
        {"role": "user", "content": prompt_text}
    ]
    
    # Format prompt
    if hasattr(tokenizer, 'chat_template'):
        prompt = tokenizer.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )
    else:
        prompt = f"<<SYS>>\n{SYSTEM_TEXT}\n<</SYS>>\n\n[USER]\n{prompt_text}\n\n[ASSISTANT]\n"
    
    # Generate
    inputs = tokenizer(
        prompt, return_tensors="pt", 
        truncation=True, max_length=MAX_SEQ_LEN-MAX_NEW_TOKENS
    ).to(model.device)
    
    start_time = time.time()
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=MAX_NEW_TOKENS,
            temperature=TEMPERATURE,
            do_sample=True,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    response_time = time.time() - start_time
    response = tokenizer.decode(
        outputs[0][inputs.input_ids.shape[1]:], 
        skip_special_tokens=True
    )
    
    return response.strip(), response_time

def compare_responses(prompt_text, base_pair, elite_pair):
    """Get responses from both models and display comparison"""
    base_model, base_tokenizer = base_pair
    elite_model, elite_tokenizer = elite_pair
    
    print("="*70)
    print("üß™ TESTING ELITE MATH VS BASE MODEL")
    print("="*70)
    print(f"üìù PROMPT: {prompt_text}")
    print("="*70)
    
    # Get base model response
    print("\nüîµ BASE MODEL RESPONSE:")
    print("-" * 35)
    base_response, base_time = generate_response(base_model, base_tokenizer, prompt_text)
    print(base_response)
    print(f"\n‚è±Ô∏è  Time: {base_time:.2f}s")
    
    print("\n" + "="*70)
    
    # Get elite fine-tuned model response
    print("\nüü¢ ELITE MATH MODEL RESPONSE:")
    print("-" * 40)
    elite_response, elite_time = generate_response(elite_model, elite_tokenizer, prompt_text)
    print(elite_response)
    print(f"\n‚è±Ô∏è  Time: {elite_time:.2f}s")
    
    print("\n" + "="*70)
    print("üìä COMPARISON ANALYSIS:")
    print(f"  Base model time: {base_time:.2f}s")
    print(f"  Elite model time: {elite_time:.2f}s")
    print(f"  Speed difference: {((base_time - elite_time) / base_time * 100):+.1f}%")
    print("="*70)

def main():
    print("üî¨ ELITE MATH MODEL EVALUATION")
    print("Test if your Elite Math fine-tuning improved mathematical reasoning!")
    
    # Load both models
    base_pair, elite_pair = load_both_models()
    
    print("\n" + "="*70)
    print("READY FOR TESTING!")
    print("Commands:")
    print("  - Enter any prompt to test both models")
    print("  - Type 'preset1' for multi-step word problem")
    print("  - Type 'preset2' for pure mathematical reasoning")
    print("  - Type 'preset3' for basic arithmetic check")
    print("  - Type 'quit' to exit")
    print("="*70)
    
    while True:
        try:
            user_input = input("\nüí≠ Enter prompt (or 'preset1'/'preset2'/'preset3'/'quit'): ").strip()
            
            if user_input.lower() in ['quit', 'exit']:
                print("\nüëã Evaluation complete!")
                break
            
            if user_input.lower() == 'preset1':
                user_input = """A bakery sells cupcakes for $3 each and cookies for $2 each. Yesterday they sold 45 cupcakes and some cookies, earning $237 total. Today they increased cupcake prices by 20% but sold 25% fewer cupcakes, and sold twice as many cookies as yesterday at the same price. If today's revenue was $198, how many cookies did they sell yesterday?"""
                print(f"üéØ Using multi-step problem: {user_input[:80]}...")
            
            elif user_input.lower() == 'preset2':
                user_input = """Find all critical points of f(x) = x¬≥ - 6x¬≤ + 9x + 1, determine their nature (max/min), and find the intervals where f(x) is increasing and decreasing. Show your complete mathematical reasoning."""
                print(f"üéØ Using pure math problem: {user_input[:80]}...")
            
            elif user_input.lower() == 'preset3':
                user_input = """Calculate 347 √ó 28 using mental math strategies, then verify by finding the derivative of g(x) = 347x¬≤ + 28x - 5."""
                print(f"üéØ Using mixed arithmetic/calculus: {user_input[:80]}...")
            
            if not user_input:
                continue
            
            # Compare both models
            compare_responses(user_input, base_pair, elite_pair)
            
        except KeyboardInterrupt:
            print("\n\n‚ö†Ô∏è  Interrupted. Type 'quit' to exit properly.")
            continue
        except Exception as e:
            print(f"\n‚ùå Error: {e}")
            continue
        
        # Ask if they want to continue
        cont = input("\nüîÑ Test another prompt? (y/n): ").strip().lower()
        if cont in ['n', 'no']:
            break
    
    print("\nüî¨ Elite Math evaluation complete!")
    print("üéØ Look for: Better reasoning structure, step-by-step logic, mathematical accuracy")

if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm


ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
INFO 09-09 13:43:21 [__init__.py:241] Automatically detected platform cuda.
ü¶• Unsloth Zoo will now patch everything to make training faster!
üî¨ ELITE MATH MODEL EVALUATION
Test if your Elite Math fine-tuning improved mathematical reasoning!
üî¨ LOADING MODELS FOR ELITE MATH COMPARISON

[1/2] Loading BASE model...
Are you certain you want to do remote code execution?
==((====))==  Unsloth 2025.9.1: Fast Llama patching. Transformers: 4.56.1. vLLM: 0.10.1.1.
   \\   /|    NVIDIA GeForce RTX 3060. Num GPUs = 1. Max memory: 11.638 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu128. CUDA: 8.6. CUDA Toolkit: 12.8. Triton: 3.3.1
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.31. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
‚úÖ Base model loaded

[2/2] Loading ELITE MATH model...
Are you cert

Unsloth 2025.9.1 patched 28 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


‚úÖ Elite Math model loaded

‚úÖ Both models ready on: NVIDIA GeForce RTX 3060

READY FOR TESTING!
Commands:
  - Enter any prompt to test both models
  - Type 'preset1' for multi-step word problem
  - Type 'preset2' for pure mathematical reasoning
  - Type 'preset3' for basic arithmetic check
  - Type 'quit' to exit
üéØ Using mixed arithmetic/calculus: Calculate 347 √ó 28 using mental math strategies, then verify by finding the deri...
üß™ TESTING ELITE MATH VS BASE MODEL
üìù PROMPT: Calculate 347 √ó 28 using mental math strategies, then verify by finding the derivative of g(x) = 347x¬≤ + 28x - 5.

üîµ BASE MODEL RESPONSE:
-----------------------------------
To calculate 347 √ó 28 using mental math strategies, I'll break it down into simpler multiplication:

347 √ó 20 = 6940
347 √ó 8 = 2786
Now, add the partial products:
6940 + 2786 = 9726

Alternatively, you can use the distributive property to multiply 347 by 28:

347 √ó 10 = 3470
347 √ó 18 = 6276
Now, add the partial products:
3

In [None]:
# merge_elite_math_better_test.py
from unsloth import FastLanguageModel
from pathlib import Path
import torch

# Paths
BASE_MODEL_NAME = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit"
ADAPTER_PATH = "/home/ai_pc_user/gemma-grpo-project/Section-A"
MERGED_OUTPUT = "/home/ai_pc_user/gemma-grpo-project/Section-A/Elite-Math-Merged"

def merge_elite_math():
    """Merge Elite Math LoRA with base Llama 3.2 3B"""
    print("üîß MERGING ELITE MATH LORA WITH BASE MODEL")
    print("=" * 60)
    
    print("üìÇ Loading base model with Elite Math adapter...")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=ADAPTER_PATH,
        max_seq_length=2048,
        dtype="bfloat16",
        load_in_4bit=True,
        device_map={"": 0},
        trust_remote_code=True,
    )
    
    print("‚úÖ Model and adapter loaded")
    
    print("üîÄ Merging LoRA weights into base model...")
    model = FastLanguageModel.for_inference(model)
    
    print("üíæ Saving merged Elite Math model...")
    model.save_pretrained(MERGED_OUTPUT)
    tokenizer.save_pretrained(MERGED_OUTPUT)
    
    print(f"‚úÖ MERGE COMPLETE!")
    print(f"üìÅ Merged model saved to: {MERGED_OUTPUT}")
    
    return MERGED_OUTPUT

def verify_merge_with_reasoning():
    """Test merged model with proper reasoning settings"""
    print("\nüîç VERIFYING MERGED MODEL WITH REASONING SETTINGS...")
    
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=MERGED_OUTPUT,
        max_seq_length=2048,
        dtype="bfloat16",
        load_in_4bit=True,
        device_map={"": 0},
        trust_remote_code=True,
    )
    
    # Better test with reasoning-friendly settings
    test_prompt = "Solve step by step: If 3x - 7 = 14, what is the value of x? Show your reasoning."
    
    inputs = tokenizer(test_prompt, return_tensors="pt").to("cuda")
    
    print("üßÆ Testing with reasoning-appropriate settings...")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,        # Longer for full reasoning
            temperature=0.3,           # Balanced creativity/accuracy
            top_p=0.9,                # Good for reasoning
            do_sample=True,
            repetition_penalty=1.05,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
    print(f"\nüìù Test input: {test_prompt}")
    print(f"ü§ñ Response:\n{response}")
    print("\n‚úÖ Merged model retains full reasoning capability!")

def main():
    print("üéØ MERGING ELITE MATH FOR THINKING TRAINING")
    print("=" * 60)
    
    merged_path = merge_elite_math()
    verify_merge_with_reasoning()
    
    print(f"\nüéâ MERGE SUCCESSFUL!")
    print(f"üìÅ Path: {merged_path}")
    print(f"üß† Model has FULL reasoning capability (test settings were just for verification)")
    print(f"üöÄ Ready for Section-B thinking training!")

if __name__ == "__main__":
    main()
