In [1]:
!pip install -q -U transformers --no-index --find-links /kaggle/input/hf-libraries/transformers

In [None]:
import sys 
import torch
import random
import numpy as np
import pandas as pd
import gc
import time
import random
from tqdm import tqdm
import os

from IPython.display import display

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModel

if (not torch.cuda.is_available()): print("Sorry - GPU required!")
    
import logging
logging.getLogger('transformers').setLevel(logging.ERROR)

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)

test_df = pd.read_csv("/kaggle/input/llms-you-cant-please-them-all/test.csv")

sub=pd.read_csv("/kaggle/input/llms-you-cant-please-them-all/sample_submission.csv")
# test_df

In [3]:
model_name="/kaggle/input/llama-3.2-3b-instruct/pytorch/default/1"

# Clear GPU memory and delete existing objects if they exist
if torch.cuda.is_available():
    torch.cuda.empty_cache()
for obj in ['model', 'pipe', 'tokenizer']:
    if obj in globals():
        del globals()[obj]


# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
# Parameters
max_new_tokens = 150  # Maximum length of generated text
word_count_to_request = 100   #We ask the model for this many words as part of the prompt prefix

temperature = 0.9     # Higher temperature = more random/creative outputs
top_p = 0.9          # Nucleus sampling parameter for more diverse outputs

# Create pipeline with parameters
pipe = pipeline(
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
    trust_remote_code=True,
    max_new_tokens=max_new_tokens,
    temperature=temperature,
    top_p=top_p,
    do_sample=True    # Enable sampling for more diverse outputs
)

def get_response(messages):
    # Generate the response
    response = pipe(messages)[0]['generated_text'][-1]['content']
    
    # Remove leading and trailing spaces
    response = response.strip()
    
    # Find last punctuation mark
    last_period = response.rfind('.')
    last_question = response.rfind('?') 
    last_exclamation = response.rfind('!')
    last_close_bracket = response.rfind(']')
    
    # Find the last occurring punctuation mark
    last_punct = max(last_period, last_question, last_exclamation, last_close_bracket)
    
    # If we found punctuation, trim to it; otherwise return full response
    if last_punct != -1:
        return response[:last_punct + 1]
    return response

In [5]:
prompt_prefix = "Topic: "
word_count_to_request = 100  # Adjust as necessary for response length
prompt_suffix = f" (provide your response in {word_count_to_request} words)."
final_suffix = " Please provide the response in the following format: List each related category item with the relevant values enclosed in brackets. Do not mention the number of items and Don't repeat the values of one category among various categories ."

final_prompt = f"{prompt_prefix}{prompt_suffix}{final_suffix}"

print(final_prompt)


Topic:  (provide your response in 100 words). Please provide the response in the following format: List each related category item with the relevant values enclosed in brackets. Do not mention the number of items and Don't repeat the values of one category among various categories .


In [6]:
example_messages = [
    ("Evaluate the relationship between user feedback and product development in mobile applications",
        """key participants: [developer,user]
        feedback channels: [surveys, reviews, analytics, support tickets]
        development cycle : [requirement gathering, user research, prototype design, initial testing, beta release, user feedback collection, feature prioritization, implementation planning, deployment strategy]
        Uncertainties : [vocal minority,biased data,insight noise,overgeneralized complaints,engaged subset]
        Adjectives : [Enigmatic,Synergistic,Reciprocal,Holistic,Methodical,Systematic]
        """
    ),
    (
        "Compare and contrast the importance of team diversity and technical expertise in project management",
        """skill types: [coding, design ,architecture]
mindset approaches: [analytical , creative]
team roles: [project manager, backend developer, frontend specialist, UX researcher, quality assurance expert, business analyst, systems architect, security specialist, documentation writer]
Hidden challenges : [diverse perspectives,skill depth,innovation boost,knowledge gaps,collaborative synergy]
Adjectives : [Enmeshed, Heterogeneous, Oscillatory, Malleable]
"""
    ),
    (
        "Analyze the impact of remote work policies on organizational culture and employee retention",
        """Five communication tools: [email, chat, video, voice, whiteboard]
workspace types: [home ,hybrid]
cultural elements: [virtual watercooler, online team building, digital mentorship, knowledge sharing platform, informal chat channels, collaborative workspaces, recognition systems, team rituals, remote social events]
Unclear factors: [Communication breakdowns,Social isolation,Technical issues,Social isolation,Managerial challenges]
Adjectives : [Autonomy , Eudaimonic , Thixotrophic, Kairotic, Ephemeral]
"""
    ),
    (
        "Examine the balance between short-term profits and environmental sustainability in manufacturing",
        """Basic resources: [water, electricity, raw materials]
Production steps: [design, assembly, testing, packaging]
Green initiatives: [solar power installation, water recycling system, waste reduction program, energy monitoring, materials recovery, carbon offsetting, supplier certification, eco-packaging design, environmental reporting]
Speculative concerns: [Greenwashing, Cost-benefit trade-offs, Stakeholder resistance]
Adjectives : [Dendrochronological, Teleological, Echopraxic, Heliotropic]
"""
    ),
    (
        "Investigate the role of standardized testing in educational assessment",
        """Key aspects of student evaluation:
assessment types: [written, practical]
subject areas: [math, science, language, history, arts]
evaluation methods: [multiple choice testing, essay writing, project assessment, oral examination, performance evaluation, portfolio review, peer assessment, practical demonstration, research presentation]
Unclear factors: [anxiety and stress causing factors, cheating and academic dishonesty, inadequate representation of skills]
Adjectives : [Kairocentric, Autopoietic, Velleitarian, Meraki]
"""
    ),
    (
        "Analyze the influence of social media metrics on content creation strategies",
        """Engagement types: [likes, shares, comments]
Content formats: [text, image, video, audio]
Performance indicators: [engagement rate, reach metrics, audience growth, content longevity, conversion tracking, follower retention, sentiment analysis, demographic insights, behavioral patterns]
Lingering doubts: [Inaccurate engagement metrics, Overemphasis on vanity metrics, Content diversity, Insufficient audience needs, Algorithm changes]
Adjectives: [Perspicacious, Recondite, Sagacious, Abstruse, Esoteric]
"""
    ),
    (
        "Evaluate the relationship between urban density and community wellbeing",
        """housing types: [apartments, houses]
community spaces: [parks, shops, schools, clinics, libraries]
neighborhood elements: [community center, public transportation hub, green space, local marketplace, pedestrian zones, recreational facilities, cultural venues, healthcare access points, social gathering spots]
Probable challenges: [Transportation challenges,Health disparities,Overcrowding,Decreased green spaces ,Gentrification]
Adjectives : [Convivial, Deleterious, Eudaimonic,Dysfunctional,Salubrious,Nexus]
"""
    ),
    (
        "Examine the impact of artificial intelligence on healthcare delivery",
        """diagnostic tools: [X-ray, MRI, CT scan, ultrasound]
healthcare providers: [doctor, nurse, technician]
AI applications: [diagnosis assistance, treatment planning, patient monitoring, drug interaction checking, medical imaging analysis, predictive health tracking, administrative automation, virtual nursing assistance, personalized medicine recommendation]
Critical ambiguities: [Over-reliance,Data privacy,Unintended bias,limited oversight and accountability ,Inadequate regulation and standards]
Adjectives : [Personalized , Interoperable  ,Efficacious]
"""
    ),
    (
        "Analyze the relationship between food sustainability and restaurant profitability",
        """supply chain types: [traditional ,local]
ingredient sources: [farms, markets, wholesalers, gardens, fisheries, ranches]
sustainability practices: [composting system, water conservation, energy-efficient appliances, waste tracking, local sourcing, seasonal menu planning, recyclable packaging, food donation program, staff training initiatives]
Overlooked nuances: [Government regulations,Economic fluctuations,Supply chain resilience ,Certification and labeling]
Adjectives : [Ephemeralization,Symbiotic,Catalytic, Agroecological,Biophilic,Regenerative ,Terroir-driven]
"""
    ),
    (
        "Investigate the balance between tourism growth and cultural preservation",
        """tourist types: [adventure seekers, culture enthusiasts,leisure travelers]
accommodation styles: [hotels, hostels, homestays, resorts]
Count these preservation measures: [visitor capacity limits, cultural education programs, local artisan support, traditional craft workshops, heritage site protection, community involvement initiatives, sustainable tourism guidelines, indigenous culture celebration, historical documentation efforts]
Probable conflicts:[Climate change impacts,Over-tourism consequences,Cultural appropriation risks,Community displacement possibilities,Economic instability threats]
Adjectives : [Geo-cultural, Indigenous ,Culturo-sensitive, Heritagistic, Authentic ,Community-based , Preservational , Eco-friendly]
"""
    ),
    (
        "Evaluate the effectiveness of digital marketing in small businesses",
        """platform types: [social ,search]
marketing channels: [email, social media, blog, video, podcasts]
Track these marketing elements: [content calendar, audience segmentation, conversion tracking, engagement metrics, campaign optimization, brand consistency, platform integration, performance analytics, ROI measurement]
Open questions:[Data quality issues,Ever-changing consumer behaviors,Adapting to new social media trends ,Rapidly evolving technology landscapes]
Adjectives: [Viral, Niche-specific, Trackable, pioneering, agile ,Data-savvy, omnichannel]
"""
    ),
    (
        "Analyze the relationship between office design and employee productivity",
        """lighting types: [natural, task, ambient]
noise levels: [silent, quiet, collaborative, energetic]
workspace features: [ergonomic furniture, natural light optimization, acoustic treatment, collaboration zones, focus pods, breakout spaces, wellness areas, technology integration,  climate control systems]
Unresolved issues:[Employee personalization preferences,Adaptive workspace needs,Technological advancements integration,Space for future growth and expansion]
Adjectives: [Invigorating, Empowering, uplifting , Harmonious]
"""
    ),
    (
        "Examine the impact of gamification on student engagement",
        """reward types: [points , badges]
game elements: [leaderboards, challenges, levels, quests, achievements]
engagement tools: [progress tracking, skill trees, peer competition, collaborative missions, achievement unlocks, personalized feedback, adaptive difficulty, social interaction features, reward ceremonies]
Unresolved challenges:[Technical issues,motivation variability,training requirements,Parental concerns ,Equity and accessibility issues]
Adjectives: [Edutaining,Interactivist,Pedagogical , Ludic ,Engagitative]
"""
    ),
    (
        "Investigate the relationship between public transportation and urban air quality",
        """vehicle types: [buses, trains, trams, and bikes]
fuel sources: [electric, hybrid, and renewable]
air quality initiatives: [electric fleet conversion, smart traffic management, emission monitoring stations, green corridor development, public transport expansion, bicycle infrastructure, zero-emission zones, renewable energy integration, air quality reporting systems]
Hidden challenges are : [Policy and regulatory changes,Technological advancements,Public acceptance and adoption,Funding and investment uncertainties,Climate and weather variability]
Adjectives: [Transformative , Multimodal , Interconnected , Ecomobility, Aerius, Pedestrianization, Transitology]
"""
    )
]

In [None]:
def get_essays(example_conversations, prompt_final_suffix = None, verbose=True):

    prompt_complete_suffix = prompt_suffix
    
    if prompt_final_suffix is not None:
        prompt_complete_suffix = prompt_complete_suffix + prompt_final_suffix
    
    # Load test data and create submission DataFrame
    submission = pd.DataFrame()
    submission['id'] = test_df['id']
    submission['essay'] = ''
    
    # Handle both single example set and list of example sets
    examples_list = example_conversations
    if not isinstance(example_conversations[0], list):  
        examples_list = [example_conversations]
    
    # For each topic, generate using conversation examples
    for i, row in test_df.iterrows():
        
        if verbose:
            print(f"\n{'*'*5}{row['topic']}{'*'*5}\n")
        
        responses = []
        for examples in examples_list:
            example_messages = []
            for prompt, response in examples:
                example_messages.extend([
                    {"role": "user", "content": f"{prompt_prefix}{prompt}{prompt_complete_suffix}"},
                    {"role": "assistant", "content": response}
                ])
            actual_prompt_messsage = [
                {"role": "user", "content": f"{prompt_prefix}{row['topic']}{prompt_complete_suffix}"}
            ]
            if verbose: print(actual_prompt_messsage, "\n")
            messages = example_messages + actual_prompt_messsage
            essay = get_response(messages)
            responses.append(essay)
       
        submission.loc[i, 'essay'] = ' '.join(responses)
        if verbose:
            print(f"{' '.join(responses)}\n")
        
    return submission

In [None]:
%%time

verbose = not bool(os.getenv("KAGGLE_IS_COMPETITION_RERUN"))
submission = get_essays([example_messages], prompt_final_suffix = final_suffix, verbose = verbose)    
submission


*****Compare and contrast the importance of self-reliance and adaptability in healthcare.*****

[{'role': 'user', 'content': "Topic: Compare and contrast the importance of self-reliance and adaptability in healthcare. (provide your response in 100 words). Please provide the response in the following format: List each related category item with the relevant values enclosed in brackets. Do not mention the number of items and Don't repeat the values of one category among various categories ."}] 

self-reliance models: [patient-centered, autonomy, empowerment, patient-led]
adaptive frameworks: [flexible, responsive, resilient, dynamic, adaptable]
Healthcare delivery types: [primary care, specialized care, preventive care, rehabilitative care, telemedicine]
Uncertainties: [Patient trust, provider-patient relationships, information literacy, access to resources, cultural competence]


*****Evaluate the effectiveness of management consulting in addressing conflicts within marketing.*****

[{

Unnamed: 0,id,essay
0,1097671,"self-reliance models: [patient-centered, autonomy, empowerment, patient-led]\nadaptive frameworks: [flexible, responsive, resilient, dynamic, adaptable]\nHealthcare delivery types: [primary care, specialized care, preventive care, rehabilitative care, telemedicine]\nUncertainties: [Patient trust, provider-patient relationships, information literacy, access to resources, cultural competence]"
1,1726150,"Consulting services: [strategy, operations, organizational design, change management]\nmarketing channels: [digital, social media, content marketing, event marketing]\nconflict resolution methods: [facilitated workshops, stakeholder analysis, communication audits, power mapping, cultural assessment, role-playing exercises, trust-building initiatives, conflict mediation, post-conflict planning]\nLingering concerns: [Resistance to change,Cultural differences,Power imbalances,Communication barriers,Resource constraints]\nAdjectives: [Strategic, Proactive, Collaborative, Transformative, Solution-focused, Adaptive]"
2,3211968,"problem-solving approaches: [algorithmic, iterative, incremental]\nlearning strategies: [self-study, mentorship, online courses, hands-on projects, coding challenges]\ntechnical skills: [programming languages, data structures, software development methodologies, testing and debugging, architecture and design]\nPersonal qualities: [resilience, adaptability, critical thinking, creativity, time management, self-discipline, accountability]\nUnaddressed factors: [Teamwork and collaboration limitations,Bias in problem-solving,Dependence on tools and technology]\nAdjectives: [Autonomous,Proactive,Analytical,Innovative,Self-directed, Competent,Resilient]"
