In [None]:
import pandas as pd
import spacy

# Load the dataset
file_path = 'optimal_elbow_15_topics.csv'
df = pd.read_csv(file_path)

# Load spaCy's English model
nlp = spacy.load("en_core_web_sm")

# Function to generate topic names based on keywords
def generate_topic_name(keywords, num_words=2):
    # Split keywords into individual words
    words = keywords.split(", ")
    # Join the top 'num_words' significant words as the topic name
    topic_name = " ".join(words[:num_words])
    return topic_name

# Apply the function to create a new column with generated topic names
df['Generated Topic Name'] = df['Keywords'].apply(generate_topic_name)

# Function to reorder topic names to place verbs first if both verb and noun are present
def reorder_verb_noun(topic_name):
    doc = nlp(topic_name)
    verbs = [token.text for token in doc if token.pos_ == 'VERB']
    nouns = [token.text for token in doc if token.pos_ in ['NOUN', 'PROPN']]

    # Check if both verb and noun exist, then place verb before noun
    if verbs and nouns:
        return f"{verbs[0]} {nouns[0]}"
    else:
        return topic_name  # Leave as-is if no verb is found

# Apply the reorder function to the 'Generated Topic Name' column
df['Generated Topic Name'] = df['Generated Topic Name'].apply(reorder_verb_noun)

# Display selected columns of the modified DataFrame
print(df[['Topic Number', 'Generated Topic Name']])

# Save the modified DataFrame to a new CSV file
output_file_path = 'keywords_with_generated_topic_names.csv'
df.to_csv(output_file_path, index=False)
print(f"New file with generated topic names saved to: {output_file_path}")

    Topic Number Generated Topic Name
0              1       message status
1              2              use way
2              3          item applet
3              4    opinion direction
4              5  share automatically
5              6          create post
6              7      publish account
7              8        tweet twitter
8              9            photo url
9             10       wordpress save
10            11        evernote note
11            12        facebook page
12            13        update change
13            14              add rss
14            15              com pic
New file with generated topic names saved to: keywords_with_generated_topic_names.csv
