In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.util import ngrams
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re
from collections import Counter

# Load ticket summary data (assuming CSV)
df = pd.read_csv(".data\\tickets.csv")
summaries = " ".join(df["Summary"].dropna())

# Preprocessing
nltk.download("stopwords")
nltk.download("punkt")
stop_words = set(stopwords.words("english"))

# Tokenization and cleaning
words = word_tokenize(summaries.lower())  # Convert to lowercase & tokenize
words = [re.sub(r'\W+', '', word) for word in words]  # Remove special characters
words = [word for word in words if word not in stop_words and word.isalpha()]  # Remove stopwords & non-alphabetic tokens

# Generate Bigrams and Trigrams
bigrams = list(ngrams(words, 2))
trigrams = list(ngrams(words, 3))

# Convert to strings for WordCloud
bigram_phrases = [" ".join(bigram) for bigram in bigrams]
trigram_phrases = [" ".join(trigram) for trigram in trigrams]

# Count frequency
bigram_freq = Counter(bigram_phrases)
trigram_freq = Counter(trigram_phrases)

# Select most common phrases
top_bigrams = " ".join([phrase for phrase, _ in bigram_freq.most_common(50)])
top_trigrams = " ".join([phrase for phrase, _ in trigram_freq.most_common(50)])

# Generate Word Cloud for Bigrams
wordcloud_bigrams = WordCloud(width=800, height=400, background_color="white").generate(top_bigrams)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud_bigrams, interpolation="bilinear")
plt.axis("off")
plt.title("Bigram Word Cloud")
plt.show()

# Generate Word Cloud for Trigrams
wordcloud_trigrams = WordCloud(width=800, height=400, background_color="white").generate(top_trigrams)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud_trigrams, interpolation="bilinear")
plt.axis("off")
plt.title("Trigram Word Cloud")
plt.show()
