In [1]:
import pandas as pd
import numpy as np

#
# LLM Generated Answers for "Why is it difficult to go to the moon?"
#
question    = '''Why is it difficult to go to the moon? Provide an answer in five or fewer sentences.'''
question_id = 'Q01'

responses = {}
responses['gpt4o'] = '''Going to the Moon is difficult because it requires escaping Earth's gravity, which demands immense energy and advanced rocket technology. The journey involves navigating harsh space conditions, such as extreme temperatures, radiation, and microgravity, which can harm equipment and astronauts. Landing on the Moon is challenging due to its low gravity and lack of atmosphere, requiring precise control to avoid crashes. Returning to Earth safely adds complexity, as spacecraft must endure intense heat during reentry. These factors demand careful planning, engineering, and significant resources.'''
responses['gemini_15flash'] = '''Going to the moon is incredibly challenging due to several factors:
* **Vast distance:** The moon is hundreds of thousands of miles away, requiring immense amounts of fuel and precise navigation.
* **Extreme environment:** Space is a harsh environment with radiation, micrometeoroids, and temperature extremes that spacecraft must withstand.
* **Technical complexity:** Building and launching rockets, spacecraft, and life support systems for human missions requires cutting-edge technology and meticulous planning. 
* **High risk:** Space travel is inherently dangerous, with potential for malfunctions, accidents, and unforeseen challenges. 
* **Costly endeavor:** Space exploration is incredibly expensive, requiring significant financial investment and resources.
'''
responses['claude']  = '''Going to the moon is difficult due to several key challenges: the immense energy required to escape Earth's gravity, the precise engineering needed for spacecraft and life support systems, the extreme environmental conditions of space including radiation and temperature variations, the complexity of navigation and landing, and the enormous financial and technological investments required for such missions.'''
responses['llama32'] = '''Going to the moon is a complex and challenging endeavor due to its proximity to Earth, lack of atmosphere, and extreme temperatures. The harsh conditions on the lunar surface, including radiation exposure and vacuum-like environment, pose significant risks to both human health and equipment. Additionally, the distance from Earth requires a massive amount of energy and resources to achieve escape velocity and navigate the long journey. Furthermore, the moon's gravity is only one-sixth of Earth's, making it difficult to maintain stable habitats and ensure safe landing. These challenges have made lunar exploration a significant technological and logistical hurdle.'''

In [2]:
#
# Manual Identification of Content Units
#
content_units = {}
content_units['gpt4o'] = [
'difficulty: escaping earth\'s gravity',
'escaping gravity requires immense energy',
'escaping gravity requires advanced rocket technology',
'difficulty: navigating harsh space conditions / harm to equipment and astronauts',
'harsh conditions: extreme temperatures',
'harsh conditions: radiation',
'harsh conditions: microgravity',
'difficulty: landing on the moon / precise control / avoid crashes',
'moon landing: low gravity',
'moon landing: lack of atmosphere',
'difficulty: returning to earth safely / endure intense heat during reentry',
'overall: careful planning',
'overall: engineering',
'overall: significant resources',
]

content_units['gemini_15flash'] = [
'difficulty: vast distance',
'moon distance: hundreds of thousands of miles',
'vast distance requires immense amounts of fuel',
'vast distance requires precise navigation',
'difficulty: extreme environment / spacecraft must withstand',
'extreme environment: radiation',
'extreme environment: micrometeoroids',
'extreme environment: temperature extremes',
'difficulty: technical complexity',
'technical complexity: building/launching rockets/spacecraft',
'technical complexity: life support systems for human missions',
'technical complexity: cutting-edge technology',
'technical complexity: meticulous planning',
'difficulty: high risk',
'high risk: space travel/malfuntions/accidents/unforeseen challenges',
'difficulty: costly endeavor (investments/resources)',
]

content_units['claude']  = [
'difficulty: escaping earth\'s gravity',
'escaping gravity requires immense energy',
'difficulty: precise engineering',
'precise engineering: spacecraft',
'precise engineering: life support systems',
'difficulty: extreme environmental conditions',
'environmental conditions: radiation',
'environmental conditions: temperature variations',
'difficulty: complexity of navigation and landing',
'difficulty: enormous financial and technological investments',
]

content_units['llama32'] = [
'difficulty: proximity to Earth',
'difficulty: lack of atmosphere / vaccum-like environment',
'difficulty: extreme temperatures',
'harsh conditions: radiation exposure (lunar surface)',
'conditions pose significant risks to human health / equipment',
'difficulty: distance from Earth',
'distance requires massive amount of energy',
'distance requires massive amount of resources',
'difficulty: escape velocity (from earth)',
'difficulty: navigate long journey',
'difficulty: moon\'s gravity',
'gravity is only one-sixth of earth\'s',
'moon gravity makes it difficult to maintain stable habitats',
'moon gravity makes it difficult to ensure safe landing',
'difficulty: technological hurdles',
'difficulty: logistical hurdles',
]

In [3]:
all_content_units = set()
for model in content_units: all_content_units.update(content_units[model])
# for cu in all_content_units: print(f'"{cu}"')

In [None]:
#
# Manual Grouping Of Content Units Into Summary Content Units
#
summary_content_unit_lu = {}
summary_content_unit_lu['harsh conditions: radiation']       = ["harsh conditions: radiation", "environmental conditions: radiation", "extreme environment: radiation"]
summary_content_unit_lu['harsh conditiions']                 = ["difficulty: extreme environment / spacecraft must withstand", "difficulty: extreme environmental conditions", "conditions pose significant risks to human health / equipment"]
summary_content_unit_lu['harsh conditions: micrometeoroids'] = ["extreme environment: micrometeoroids"]
summary_content_unit_lu['harsh conditions: temperatures']    = ["environmental conditions: temperature variations", "difficulty: extreme temperatures", "extreme environment: temperature extremes", "harsh conditions: extreme temperatures"]
summary_content_unit_lu['technology']                        = ["difficulty: technological hurdles", "difficulty: technical complexity", "difficulty: precise engineering", "overall: engineering", "technical complexity: cutting-edge technology"]
summary_content_unit_lu['technology/spacecraft']             = ["precise engineering: spacecraft", "technical complexity: building/launching rockets/spacecraft"]
summary_content_unit_lu['technology/life support']           = ["technical complexity: life support systems for human missions", "precise engineering: life support systems"]
summary_content_unit_lu['high risk']                         = ["high risk: space travel/malfuntions/accidents/unforeseen challenges", "difficulty: high risk"]
summary_content_unit_lu['planning requirements']             = ["technical complexity: meticulous planning", "overall: careful planning"]
summary_content_unit_lu['costs/resources']                   = ["difficulty: enormous financial and technological investments", "difficulty: costly endeavor (investments/resources)", "overall: significant resources", "difficulty: logistical hurdles"]
summary_content_unit_lu['resources/fuel/distance']           = ["distance requires massive amount of resources", "difficulty: distance from Earth", "distance requires massive amount of energy", "difficulty: vast distance", "vast distance requires precise navigation", "vast distance requires immense amounts of fuel", "difficulty: proximity to Earth", "difficulty: navigating harsh space conditions / harm to equipment and astronauts", "difficulty: navigate long journey"]
summary_content_unit_lu['lunar distance']                    = ["moon distance: hundreds of thousands of miles"]
summary_content_unit_lu['escaping earth\'s gravity']         = ["difficulty: escaping earth's gravity", "difficulty: escape velocity (from earth)", "escaping gravity requires immense energy", "escaping gravity requires advanced rocket technology"]
summary_content_unit_lu['returning to earth/re-entry']       = ["difficulty: returning to earth safely / endure intense heat during reentry"]
summary_content_unit_lu['lunar landing challenges']          = ["difficulty: complexity of navigation and landing", "difficulty: landing on the moon / precise control / avoid crashes"]
summary_content_unit_lu['lunar landing/atmosphere']          = ["moon landing: lack of atmosphere", "difficulty: lack of atmosphere / vaccum-like environment"]
summary_content_unit_lu['lunar surface/radiation']           = ["harsh conditions: radiation exposure (lunar surface)"]
summary_content_unit_lu['lunar landing/low gravity']         = ["difficulty: moon's gravity", "gravity is only one-sixth of earth's", "moon landing: low gravity", "moon gravity makes it difficult to ensure safe landing"]
summary_content_unit_lu['harsh conditions/microgravity']     = ["harsh conditions: microgravity"]
summary_content_unit_lu['lunar habitats/low gravity']        = ["moon gravity makes it difficult to maintain stable habitats"]

_verify_set_ = set()
for scu in summary_content_unit_lu:
    for cu in summary_content_unit_lu[scu]:
        if cu in _verify_set_: print(f'WARNING: {cu} already in verify set')
        _verify_set_.add(cu)
        
print('These resultant sets should both be empty')
all_content_units - _verify_set_, _verify_set_ - all_content_units

In [None]:
#
# Data Frame Encapsulation
#
_lu_     = {'question_id':[], 'question':[], 'model':[], 'summary_content_unit':[], 'content_unit':[]}
for _model_ in content_units:
    for _content_unit_ in content_units[_model_]:
        scu = None
        for _scu_ in summary_content_unit_lu:
            if _content_unit_ in summary_content_unit_lu[_scu_]:
                scu = _scu_
                break
        _lu_['question_id'].append(question_id)
        _lu_['question'].append(question)
        _lu_['model'].append(_model_)
        _lu_['summary_content_unit'].append(scu)
        _lu_['content_unit'].append(_content_unit_)
df = pd.DataFrame(_lu_)
df.sample(3)

In [None]:
import rtsvg
from scu_pyramid_method_diagram import SCUPyramidMethodDiagram
rt = rtsvg.RACETrack()
scu_pyramid = SCUPyramidMethodDiagram(rt, df, 'question_id', 'summary_content_unit', 'model')
_tiles_     = [scu_pyramid.svgPyramid('Q01')]
for _model_ in df['model'].unique(): _tiles_.append(scu_pyramid.svgPyramid('Q01', _model_))
rt.tile(_tiles_, spacer=10)

In [None]:
'''
import random
import string
def randomString(): return ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
_tiles_ = []
for _number_of_models_ in range(2, 8): 
    for _number_of_scus_ in range(4, 100, 4):
        _qid_  = f'{_number_of_models_}_{_number_of_scus_}'
        _scus_ = []
        for i in range(_number_of_scus_): _scus_.append(randomString())
        _lu_ = {'qid':[], 'q':[], 'm':[], 'scu':[]}
        for _model_no_ in range(_number_of_models_):
            num_of_scus = random.randint(1, len(_scus_))
            _scus_choices_ = random.sample(_scus_, num_of_scus)
            for _scu_ in _scus_choices_:
                _lu_['qid'].append(_qid_)
                _lu_['q']  .append('???')
                _lu_['m']  .append(f'm{_model_no_}')
                _lu_['scu'].append(_scu_)
        df_test         = pd.DataFrame(_lu_)
        scu_pyramid_test = SCUPyramidMethodDiagram(rt, df_test, 'qid', 'scu', 'm')
        sub_tiles = []
        sub_tiles.append(scu_pyramid_test.svgPyramid(_qid_))
        for _model_no_ in range(_number_of_models_):
            sub_tiles.append(scu_pyramid_test.svgPyramid(_qid_, f'm{_model_no_}'))
        _tiles_.append(rt.tile(sub_tiles, spacer=10))
rt.tile(_tiles_, horz=False, spacer=10)
'''

In [None]:
'''
from ollama import chat
from ollama import ChatResponse
def promptModel(prompt):
    response: ChatResponse = chat(model='llama3.2', messages=[{ 'role': 'user', 'content': prompt,},])
    return response['message']['content']

for i in range(len(df)):
    _model_    = df.iloc[i]['model']
    _cu_       = df.iloc[i]['content_unit']
    for _model_other_ in set(df['model']):
        _response_ = responses[_model_]
        _prompt_   = f'For the following content unit, "{_cu_}", does that occur in the following summary?  Respond only with "True" or "False" and do not provide an explanation.\n\n"{_response_}"'
        _correct_model_ = '*' if _model_ == _model_other_ else ' '
        print(f'{promptModel(_prompt_):6} {_correct_model_} | {_model_other_:16} | {_cu_}')
'''