In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/llms-you-cant-please-them-all/sample_submission.csv
/kaggle/input/llms-you-cant-please-them-all/test.csv
/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1/model.safetensors.index.json
/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1/CODE_OF_CONDUCT.md
/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1/config.json
/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1/model-00001-of-00002.safetensors
/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1/LICENSE
/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1/model-00002-of-00002.safetensors
/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1/modeling_phi3.py
/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1/README.md
/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1/SECURITY.md
/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1/tokenizer.json
/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1/tokenizer_config.json
/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1/gitattributes
/kaggle/i

In [2]:
import sys 
import torch
import random
import numpy as np
import pandas as pd
import gc
import time
import random
from tqdm import tqdm

from IPython.display import display

from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModel

if (not torch.cuda.is_available()): print("Sorry - GPU required!")
    
import logging
logging.getLogger('transformers').setLevel(logging.ERROR)

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)

test_df = pd.read_csv("/kaggle/input/llms-you-cant-please-them-all/test.csv")
test_df

Unnamed: 0,id,topic
0,1097671,Compare and contrast the importance of self-reliance and adaptability in healthcare.
1,1726150,Evaluate the effectiveness of management consulting in addressing conflicts within marketing.
2,3211968,Discuss the role of self-reliance in achieving success in software engineering.


In [3]:
# Clear GPU memory and delete existing objects if they exist
if torch.cuda.is_available():
    torch.cuda.empty_cache()
for obj in ['model', 'pipe', 'tokenizer']:
    if obj in globals():
        del globals()[obj]

# Model configuration
model_name = '/kaggle/input/phi-3.5-mini-instruct/pytorch/default/1'


# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
max_new_tokens = 180  # Maximum length of generated text (can be overridden)

word_count_to_request = 60   #We ask the model for this many words as part of the prompt prefix

temperature = 0.7    # Higher temperature = more random/creative outputs
top_p = 0.7         # Nucleus sampling parameter for more diverse outputs (1.0 disables filtering)

# Create pipeline with parameters
pipe = pipeline(
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
    trust_remote_code=True,
    max_new_tokens=max_new_tokens,
    temperature=temperature,
    top_p=top_p,
    do_sample=True
)


# pipe = pipeline(
#     "text-generation", 
#     model="microsoft/phi-2", 
#     # tokenizer=tokenizer, 
#     trust_remote_code=True,
#     max_new_tokens=max_new_tokens,
#     temperature=temperature,
#     top_p=top_p,
#     do_sample=True
# )


In [5]:
def get_response(messages, trim_numbered_lists=True, max_tokens=None):
    # Set max_new_tokens for this specific call if provided
    generation_params = {}
    if max_tokens:
        generation_params['max_new_tokens'] = max_tokens
    
    # Generate response with optional max_tokens
    response = pipe(messages, **generation_params)[0]['generated_text'][-1]['content']
    
    # Rest of function remains the same
    response = response.strip()
    if trim_numbered_lists and "1." in response:
        response = response[:response.find("1.")].strip()
    
    last_punct = max(response.rfind('.'), response.rfind('?'), 
                    response.rfind('!'), response.rfind(']'))
    
    return response[:last_punct + 1] if last_punct != -1 else response

In [6]:
messages = [
   {"role": "user", "content": "write me a short essay on the joys of spending money"},
]
get_response(messages)

"Title: The Joyful Expenditure: A Reflection on the Pleasures of Spending Money\n\nIn the tapestry of human experience, money is often seen as a necessary evil—a tool to acquire the essentials of life. Yet, beneath the surface of this utilitarian view lies a more nuanced and joyful aspect of money: the pleasure derived from spending it. This essay explores the multifaceted joys of spending money, examining how it can enhance our lives, create moments of happiness, and serve as a catalyst for personal growth.\n\nAt its core, spending money is an act of self-expression. Each purchase, whether it's a cup of coffee, a new book, or a luxurious vacation, is a statement about our values, interests, and desires."

In [7]:
essays = []
for i,row in test_df.iterrows():
    essays.append(get_response([
   {"role": "user", "content": row['topic']}]))


test_df['essay'] = essays

test_df

Unnamed: 0,id,topic,essay
0,1097671,Compare and contrast the importance of self-reliance and adaptability in healthcare.,"Self-reliance and adaptability are both crucial attributes in the healthcare sector, yet they serve different purposes and are manifested in various ways.\n\nSelf-reliance in healthcare refers to the ability of healthcare professionals, patients, and institutions to manage their resources, knowledge, and skills independently."
1,1726150,Evaluate the effectiveness of management consulting in addressing conflicts within marketing.,"Management consulting can be highly effective in addressing conflicts within marketing by providing an objective, expert perspective that can help resolve issues and improve strategies."
2,3211968,Discuss the role of self-reliance in achieving success in software engineering.,Self-reliance plays a crucial role in achieving success in software engineering for several reasons:


In [8]:
submission = test_df[['id','essay']]

In [9]:
submission.to_csv("submission.csv")