In [None]:
%%capture
%pip install -q -U google-genai
%pip install google-cloud-aiplatform


In [None]:
import pandas as pd
import google.generativeai as genai
import time
import os

genai.configure(api_key="")
generation_config = {
    "temperature": 0.7,  # Lower temperature for more deterministic outputs
    "max_output_tokens": 32,  # Limit output to keep titles concise
}

# Safety settings (adjust as needed)
safety_settings = [
    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
]

# Initialize Gemini model
model = genai.GenerativeModel(
    model_name="gemini-2.0-flash-001",
    generation_config=generation_config,
    safety_settings=safety_settings
)

input_csv = "/kaggle/input/springerjournal-450tk-0-5cosine/test.csv"
output_csv = "/kaggle/working/generated_titles.csv"

In [None]:
def generate_title(abstract):
    """Generate a title from an abstract using Gemini."""
    instruction = "Write a consise, formal and structured title for this scientific research work, return ONLY the title:\n"
    prompt = instruction + abstract + "\n\nTitle:"
    try:
        response = model.generate_content(prompt)
        title = response.text.strip()
        return title if title else "Title generation failed"
    except Exception as e:
        print(f"Error generating title: {e}")
        return "Error: Unable to generate title"

def process_csv(input_csv, output_csv, batch_size=10):
    """Process CSV file in batches and generate titles."""
    # Read CSV
    df = pd.read_csv(input_csv)
    
    # Ensure 'abstract' column exists
    if 'abstract' not in df.columns:
        raise ValueError("CSV must contain an 'abstract' column")
    
    # Initialize column for generated titles
    df['generated_title'] = ""
    
    # Process in batches
    for i in range(0, len(df), batch_size):
        batch = df[i:i + batch_size]
        for index, row in batch.iterrows():
            print(f"Processing abstract {index + 1}/{len(df)}")
            df.at[index, 'generated_title'] = generate_title(row['abstract'])
            time.sleep(1)  # Delay to respect API rate limits
        
        # Save progress periodically
        df.to_csv(output_csv, index=False)
        print(f"Progress saved to {output_csv} after processing {i + batch_size} abstracts")
    
    return df

In [None]:
# Process the CSV
try:
    result_df = process_csv(input_csv, output_csv)
    print(f"Title generation completed. Results saved to {output_csv}")
except Exception as e:
    print(f"Error processing CSV: {e}")