In [None]:
import pandas as pd
import random
import uuid
import datetime


intents = [
    "greet", "set_reminder", "check_reminder", "ask_weather", "play_music",
    "turn_on_lights", "turn_off_lights", "get_news", "set_alarm", "bye"
]


intent_examples = {
    "greet": ["Hello", "Hi", "Hey there", "Good morning", "Yo!"],
    "set_reminder": ["Remind me to drink water", "Set a reminder for meeting", "Remind me at 6pm"],
    "check_reminder": ["What are my reminders?", "Do I have any reminders?", "Reminders for today?"],
    "ask_weather": ["What's the weather?", "Will it rain today?", "Weather update please"],
    "play_music": ["Play some music", "Play relaxing songs", "Start my playlist"],
    "turn_on_lights": ["Turn on the lights", "Lights on", "Switch on bedroom lights"],
    "turn_off_lights": ["Turn off the lights", "Lights off", "Switch off all lights"],
    "get_news": ["Tell me the news", "What's in the news today?", "News update"],
    "set_alarm": ["Set an alarm for 7am", "Wake me up at 6", "Alarm at 5:30am"],
    "bye": ["Bye", "See you", "Thanks, goodbye"]
}

data = []


for user in range(100):
    user_id = str(uuid.uuid4())
    last_intent = None
    timestamp = datetime.datetime.now() - datetime.timedelta(days=random.randint(0, 30))

    for _ in range(10):
        intent = random.choice(intents)
        message = random.choice(intent_examples[intent])
        context = last_intent if last_intent else "none"
        last_intent = intent

        data.append({
            "user_id": user_id,
            "timestamp": timestamp.strftime("%Y-%m-%d %H:%M:%S"),
            "message": message,
            "intent": intent,
            "prev_intent": context
        })


        timestamp += datetime.timedelta(minutes=random.randint(1, 120))

df = pd.DataFrame(data)
df.to_csv("synthetic_user_chats.csv", index=False)
df.head(10)

In [None]:
df.tail()

In [None]:
df["timestamp"] = pd.to_datetime(df["timestamp"])

df = df.sort_values(by=["user_id", "timestamp"])


print(df["intent"].value_counts())
print(df["prev_intent"].value_counts())

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt


transitions = df.groupby("user_id", group_keys=False).apply(
    lambda x: list(zip(x["prev_intent"], x["intent"]))
).explode()


transitions = transitions.dropna().reset_index(drop=True)


transition_df = transitions.apply(pd.Series)
transition_df.columns = ["from", "to"]

transition_df = transition_df.value_counts().reset_index()
transition_df.columns = ["from", "to", "count"]


pivot = transition_df.pivot(index="from", columns="to", values="count").fillna(0)

import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 8))
sns.heatmap(pivot, annot=True, fmt=".0f", cmap="YlGnBu")
plt.title("🌀 Intent Transition Matrix")
plt.xlabel("Next Intent")
plt.ylabel("Previous Intent")
plt.show()

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df["intent_token"] = le.fit_transform(df["intent"])

In [None]:
print(le.classes_)

In [None]:
sequences = []


for uid, group in df.groupby("user_id"):
    tokens = group["intent_token"].tolist()

    for i in range(1, len(tokens)):
        sequences.append((tokens[:i], tokens[i]))

In [None]:
for x in sequences[:5]:
    print(x)

In [None]:
from torch.nn.utils.rnn import pad_sequence
import torch

X_seq = [torch.tensor(x, dtype=torch.long) for x, y in sequences]
y_seq = torch.tensor([y for x, y in sequences], dtype=torch.long)


X_padded = pad_sequence(X_seq, batch_first=True, padding_value=0)

print(X_padded.shape)
print(y_seq.shape)

In [None]:
import torch.nn as nn

class IntentPredictor(nn.Module):
    def __init__(self, vocab_size, d_model=64, nhead=4, num_layers=2):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_encoder = nn.Embedding(100, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, vocab_size)

    def forward(self, x):
        pos = torch.arange(x.size(1), device=x.device).unsqueeze(0)
        x = self.embedding(x) + self.pos_encoder(pos)
        x = self.transformer(x)
        x = x[:, -1, :]  # Last time step
        return self.fc(x)

In [None]:
model = IntentPredictor(vocab_size=len(le.classes_))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

epochs = 10
for epoch in range(epochs):
    model.train()
    out = model(X_padded)
    loss = criterion(out, y_seq)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

In [None]:
test_sequence = ["greet", "ask_weather"]
test_tokens = torch.tensor([le.transform(test_sequence)], dtype=torch.long)

with torch.no_grad():
    output = model(test_tokens)
    pred = torch.argmax(output, dim=1)
    print("Predicted next intent:", le.inverse_transform(pred.numpy()))