In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="noelmathewisaac/inspirational-quotes-distilgpt2")

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("noelmathewisaac/inspirational-quotes-distilgpt2")
model = AutoModelForCausalLM.from_pretrained("noelmathewisaac/inspirational-quotes-distilgpt2")

file_path = '/content/drive/My Drive/prepro/generated_quotes.csv'

# Open the file in write mode to clear its content
with open(file_path, 'w') as file:
    file.truncate(0)


In [None]:
import random
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import os

# Check if a GPU is available and use it
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("noelmathewisaac/inspirational-quotes-distilgpt2")
model = AutoModelForCausalLM.from_pretrained("noelmathewisaac/inspirational-quotes-distilgpt2").to(device)

# Define the path to your dataset in Kaggle
dataset_path = '/kaggle/input/sssaaammmm/preprocessed_quotes_no_quote_author_category (1).csv'

# Load the preprocessed dataset
preprocessed_data = pd.read_csv(dataset_path)

# Define the path to save the generated file in Kaggle's working directory
generated_file_path = '/kaggle/working/generated_quotes1.csv'

# Function to generate quotes for each row without overwriting existing quotes
def generate_quotes_for_each_row(dataframe, max_rows, generated_file_path, max_length=100, save_every=100):
    generated_quotes = []

    if os.path.exists(generated_file_path):
        # Load existing generated quotes and determine the starting index in the dataset
        existing_quotes_data = pd.read_csv(generated_file_path, delimiter='\t')
        num_existing_quotes = len(existing_quotes_data)
    else:
        num_existing_quotes = 0

    for index in range(num_existing_quotes, max_rows):
        entry = dataframe.iloc[index]["first_3_words"]
        seed_text = entry
        input_ids = tokenizer.encode(seed_text, return_tensors="pt").to(device)
        attention_mask = torch.ones(input_ids.shape, device=device)
        output = model.generate(input_ids, max_length=max_length, no_repeat_ngram_size=20, top_k=50, pad_token_id=model.config.eos_token_id, attention_mask=attention_mask)
        quote = tokenizer.decode(output[0], skip_special_tokens=True)

        generated_quotes.append({"Generated_Quote": quote, "Source_Row": f"Row {index + 1}"})

        if len(generated_quotes) % save_every == 0:
            # Append newly generated quotes to the existing file every 100 quotes
            generated_quotes_data = pd.DataFrame(generated_quotes)
            generated_quotes_data.to_csv(generated_file_path, mode='a', header=False, index=False, sep='\t')
            generated_quotes = []  # Clear the list to avoid saving the same quotes multiple times
            print(f"Yay {index + 1} saved")

    # Append any remaining newly generated quotes
    if generated_quotes:
        generated_quotes_data = pd.DataFrame(generated_quotes)
        generated_quotes_data.to_csv(generated_file_path, mode='a', header=False, index=False, sep='\t')
        print(f"Yay {max_rows} saved")

# Generate up to 1 quote per row for the maximum number of rows
max_rows_to_generate = len(preprocessed_data)
generate_quotes_for_each_row(preprocessed_data, max_rows_to_generate, generated_file_path)


In [None]:
import shutil

# Source file path
source_file = '/kaggle/input/sssaaammmm/generated_quotes.csv'

# Destination file path (working directory)
destination_file = '/kaggle/working/generated_quotes1.csv'

# Copy the file to the working directory
shutil.copy(source_file, destination_file)
