# 🧠 NLP Player Feedback Analysis (iGaming)
This notebook processes player feedback using NLP techniques including tokenization, lemmatization, named entity recognition, and sentiment analysis. It also visualizes results.

In [None]:
# 1️⃣ Imports and setup
import pandas as pd
import spacy
from textblob import TextBlob
import matplotlib.pyplot as plt
from collections import Counter
import os

# Load spaCy model
nlp = spacy.load("en_core_web_sm")
os.makedirs("outputs", exist_ok=True)

In [None]:
# 2️⃣ Load the dataset
input_path = "data/feedback.csv"
df = pd.read_csv(input_path)
df.head()

In [None]:
# 3️⃣ Process feedback: tokens, NER, sentiment
processed_data = []
all_tokens = []
all_ents = []

for _, row in df.iterrows():
    text = row['feedback']
    doc = nlp(text)

    tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
    all_tokens.extend(tokens)

    entities = [(ent.text, ent.label_) for ent in doc.ents]
    all_ents.extend([ent.label_ for ent in doc.ents])

    blob = TextBlob(text)
    polarity = round(blob.sentiment.polarity, 3)
    sentiment = (
        "positive" if polarity > 0.1 else
        "negative" if polarity < -0.1 else
        "neutral"
    )

    processed_data.append({
        "id": row['id'],
        "original_feedback": text,
        "cleaned_tokens": " ".join(tokens),
        "named_entities": "; ".join([f"{e[0]} ({e[1]})" for e in entities]),
        "sentiment_score": polarity,
        "sentiment_label": sentiment
    })

output_df = pd.DataFrame(processed_data)
output_df.head()

In [None]:
# 4️⃣ Save output to CSV
output_path = "outputs/processed_feedback.csv"
output_df.to_csv(output_path, index=False)
print("✅ Processed data saved.")

In [None]:
# 5️⃣ Plot sentiment distribution
sentiment_counts = output_df['sentiment_label'].value_counts()
sentiment_counts.plot(kind='bar', color=['green', 'gray', 'red'])
plt.title("Sentiment Distribution")
plt.xlabel("Sentiment")
plt.ylabel("Count")
plt.tight_layout()
plt.savefig("outputs/sentiment_distribution.png")
plt.show()

In [None]:
# 6️⃣ Top 10 most common tokens
token_counts = Counter(all_tokens)
common_tokens = token_counts.most_common(10)
tokens, counts = zip(*common_tokens)
plt.bar(tokens, counts)
plt.title("Top 10 Most Common Tokens")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig("outputs/common_tokens.png")
plt.show()

In [None]:
# 7️⃣ Named entity label frequency
if all_ents:
    entity_counts = Counter(all_ents)
    labels, counts = zip(*entity_counts.items())
    plt.bar(labels, counts, color="purple")
    plt.title("Named Entity Label Frequency")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig("outputs/named_entity_labels.png")
    plt.show()