In [29]:
import pandas as pd
import numpy as np
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import accuracy_score, classification_report
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer


In [2]:
# Step 1: Load the First JSON file

data_list = []
with open('intents.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        data_list.append(data)
    
df1 = pd.DataFrame(data_list)    
df1.to_csv('output_intents.csv', index=False)

In [3]:
# Load the Second JSON file

data_list = []
with open('raw.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        data_list.append(data)
    
df2 = pd.DataFrame(data_list)
df2.to_csv('output_raw.csv', index=False)

In [4]:
df1.head()

Unnamed: 0,id,user_id,tags,intents,answer
0,7544,4450,"[pause, timescale]","[How can I pause my game?, What is the best wa...","In the Editor, you can just click the pause bu..."
1,7561,3146,"[iphone, upload, appstore]",[How do I upload my iPhone app to the app stor...,Log into Apple's developer portal and click th...
2,7572,142,"[debug, debugger]",[Is there a way to attach a debugger to the co...,You can attach the debugger to the editor proc...
3,7599,1213,"[editor, editor-launch-fail, editor-crash]","[Why project indefinitely loads when opened?, ...","To attempt to solve the editor crash, create a..."
4,7626,2218,"[animation, import, maya]",[How can I make animations snap to a frame in ...,Set the time of AnimationState of the relevant...


In [5]:
df2.head()

Unnamed: 0,id,user_id,tags,post_date,score,title,question,answer
0,11458,44417,[timer],2019-03-23,22.0,How to make a timer in the game,How to make a timer in the game? What I have s...,This should get you started: `private var star...
1,11858,2597,"[editor, instantiate, editor-scripting, persist]",2019-04-13,13.0,Programatically instantiating GameObjects to t...,"Hello, I am working on a tile based tactics ga...",The syntax to create gameobjects is the same a...
2,13289,4333,"[rigidbody, cube, rotatearound, roll]",2021-07-10,0.0,Rolling Cube with Rigidbody?,"Hi All, New to Unity3D, a few hours in, so bea...",Here's a short Youtube video that covers exact...
3,27760,123,"[input, unityremote]",2012-01-17,1.0,Unity Remote not sending input from iPhone or ...,"If I Build and Run to my iPhone4, all the inpu...","Stupid question, but is your mac/phone both co..."
4,48510,5822,"[instantiate, prefab]",2011-07-13,0.0,"Prefab, Instantiate, Transfer Speed","prefab, Instantiate, transfer speed??? How do ...",Use Prefab in your Game Here is the tutorial f...


In [6]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12270 entries, 0 to 12269
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   id       12270 non-null  int64 
 1   user_id  12270 non-null  int64 
 2   tags     12270 non-null  object
 3   intents  12270 non-null  object
 4   answer   12270 non-null  object
dtypes: int64(2), object(3)
memory usage: 479.4+ KB


In [7]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54509 entries, 0 to 54508
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   id         54509 non-null  int64  
 1   user_id    54509 non-null  int64  
 2   tags       54509 non-null  object 
 3   post_date  54509 non-null  object 
 4   score      54505 non-null  float64
 5   title      54509 non-null  object 
 6   question   54509 non-null  object 
 7   answer     54505 non-null  object 
dtypes: float64(1), int64(2), object(5)
memory usage: 3.3+ MB


In [8]:
df2.isnull().sum()

id           0
user_id      0
tags         0
post_date    0
score        4
title        0
question     0
answer       4
dtype: int64

In [9]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12270 entries, 0 to 12269
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   id       12270 non-null  int64 
 1   user_id  12270 non-null  int64 
 2   tags     12270 non-null  object
 3   intents  12270 non-null  object
 4   answer   12270 non-null  object
dtypes: int64(2), object(3)
memory usage: 479.4+ KB


In [10]:
df1['intents'][0]

['How can I pause my game?',
 'What is the best way to pause my game?',
 'When I play a game, how do I pause it?',
 'Could you please tell me how I can pause my game?',
 'In order to pause my game, what should I do?',
 'My game is paused, and I would like to know how to stop it?',
 'Can I pause my game in a certain way?',
 'When I am playing a video game, how can I pause it?',
 'What is the best way to pause a game once I have started it?',
 'I am unable to pause my game for some reason. What should I do?',
 'Would you mind telling me how I can pause my game?']

In [11]:
df_explodes = df1.explode('intents',ignore_index=True)

In [12]:
df_explodes = shuffle(df_explodes)

In [13]:
df_explodes

Unnamed: 0,id,user_id,tags,intents,answer
21224,229605,4621,"[lighting, shadow, line]",Why is the shadow projector causing a funny in...,The shadow projector is stretching the image h...
75059,933746,80,"[editor, variable, editorwindow, scope]",Is there a way to access EditorWindow variable...,#if UNITY\_EDITOR
96176,1295798,508,"[character movement, root motion]","In an animation where there is root motion, ho...",Set the character's position after an animatio...
93123,1235959,388,"[getcomponent, loading, reference, cache, game...",The gameobject.Find/GetComponent method does n...,Because the component doesn't exist yet.
9734,138940,2486,"[scene, large, z-fighting]","If I am creating a large scene in Unity, and I...",Don't use z-writing and z-reading on the sea. ...
...,...,...,...,...,...
127888,1743174,4517,"[scripting problem, movement, scripting beginn...","If the sound effect does not continue to play,...",Use the PlaySound() method.
55247,660973,0,[crafting],In order to make my program so that the value ...,Increase furnace by 1 if stone >= 15
74054,921841,419,"[audio, filter, pass, low, cutoff]",I would like to change the cutoff frequency of...,filter.cutoffFrequency = 100;
32544,369235,0,[trail],If I wanted to create a trail like in Angry Bi...,Inflate a Mesh Renderer.


In [24]:
# At first we will train using intent and predict answer
X = df_explodes['intents']
y = df_explodes['tags']

In [None]:
y[0]

In [31]:
# Create and fit the MultiLabelBinarizer
mlb = MultiLabelBinarizer()
binary_labels_y = mlb.fit_transform(y)

In [38]:
binary_labels_y[0]

array([0, 0, 0, ..., 0, 0, 0])

In [33]:
# Split datasets into train and test
X_train,X_test,y_train,y_test = train_test_split(X,binary_labels_y,test_size=0.2,random_state=42)

In [37]:
y_train.

AttributeError: 'numpy.ndarray' object has no attribute 'head'

In [35]:
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

In [36]:
# Train the Naive Bayes classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train, y_train)

ValueError: y should be a 1d array, got an array of shape (107346, 3738) instead.

In [None]:
y_pred

In [None]:
# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


In [None]:
def preprocess_text(text_list):
    # Convert the list of strings to a single string
    text = ' '.join(text_list)
    return text


In [None]:
df1['intents'] = df1['intents'].apply(preprocess_text)

In [None]:
# Combine user intents and all intents for TF-IDF vectorization
corpus = df1['intents']


In [None]:
corpus

In [None]:
# Feature Extraction using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)
X

In [None]:
# Calculate similarity scores for all intents
similarity_scores = cosine_similarity(X, X)
similarity_scores

In [None]:
# Assuming you have a new user intent to find the most similar intent
new_user_intent = input("Enter you Question : ")
new_user_intent_vector = vectorizer.transform([new_user_intent])
new_user_intent_vector

In [None]:
# Calculate similarity between the new user intent and all intents
new_user_similarity_scores = cosine_similarity(new_user_intent_vector, X)
new_user_similarity_scores

In [None]:
# Find the index of the most similar intent
most_similar_index = new_user_similarity_scores.argmax()
most_similar_index

In [None]:
# Retrieve the answer for the most similar intent
most_similar_answer = df1.iloc[most_similar_index]['answer']

In [None]:
print("Most Similar Intent:", df1.iloc[most_similar_index]['intents'])
print("Most Similar Tag:", df1.iloc[most_similar_index]['tags'])
print("Most Similar Answer:", most_similar_answer)