In [2]:
!pip install fuzzywuzzy

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [3]:
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from fuzzywuzzy import fuzz



In [5]:
resolved_queries = pd.read_csv('/content/resolved_queries.csv')  # Assuming CSV
unresolved_queries = pd.read_csv('/content/new_queries.csv')

In [8]:
resolved_queries.head()

Unnamed: 0,Query_ID,Pre_Resolved_Query
0,1,Unable to connect to the internet
1,2,Payment failed during checkout
2,3,App crashes when opening settings
3,4,Forgot password and unable to reset
4,5,Unable to upload files to the server


In [9]:
unresolved_queries.head()

Unnamed: 0,Variation_Query,Matches_With_Query_ID
0,Unabel to conect to the internet,1
1,Can’t connect to internet,1
2,Intenet not working,1
3,Payment failed while chekout,2
4,Payment did not go through during chckout,2


In [6]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

In [11]:
resolved_queries['cleaned_query'] = resolved_queries['Pre_Resolved_Query'].apply(preprocess_text)
unresolved_queries['cleaned_query'] = unresolved_queries['Variation_Query'].apply(preprocess_text)

In [12]:
def fuzzy_match(unresolved_query, resolved_queries, threshold=80):
    best_match = None
    highest_score = 0

    for resolved_query in resolved_queries:
        similarity_score = fuzz.ratio(unresolved_query, resolved_query)

        if similarity_score > highest_score and similarity_score >= threshold:
            best_match = resolved_query
            highest_score = similarity_score

    return best_match, highest_score

In [13]:
fuzzy_matches = []

In [14]:
for query in unresolved_queries['cleaned_query']:
    match, score = fuzzy_match(query, resolved_queries['cleaned_query'], threshold=80)
    fuzzy_matches.append((query, match, score))

fuzzy_results = pd.DataFrame(fuzzy_matches, columns=['Unresolved Query', 'Best Resolved Query (Fuzzy)', 'Score'])

In [15]:
tfidf_vectorizer = TfidfVectorizer()


In [16]:
all_queries = pd.concat([resolved_queries['cleaned_query'], unresolved_queries['cleaned_query']])


In [17]:
tfidf_matrix = tfidf_vectorizer.fit_transform(all_queries)


In [18]:
resolved_tfidf = tfidf_matrix[:len(resolved_queries)]
unresolved_tfidf = tfidf_matrix[len(resolved_queries):]
cosine_sim = cosine_similarity(unresolved_tfidf, resolved_tfidf)

In [19]:
cosine_matches = []

In [20]:
for idx, query in enumerate(unresolved_queries['cleaned_query']):
    best_match_idx = cosine_sim[idx].argmax()
    best_match = resolved_queries['cleaned_query'].iloc[best_match_idx]
    similarity_score = cosine_sim[idx].max()
    cosine_matches.append((query, best_match, similarity_score))

cosine_results = pd.DataFrame(cosine_matches, columns=['Unresolved Query', 'Best Resolved Query (Cosine)', 'Similarity'])

In [21]:
combined_results = pd.concat([fuzzy_results, cosine_results['Best Resolved Query (Cosine)'], cosine_results['Similarity']], axis=1)

In [22]:
print(combined_results)

                             Unresolved Query  \
0            unabel to conect to the internet   
1                    cant connect to internet   
2                         intenet not working   
3                payment failed while chekout   
4   payment did not go through during chckout   
5                  payment issue at check out   
6    application crashes when opening setings   
7            app crash when going to settings   
8            settings cause the app to chrash   
9               forgot passwrd and cant reset   
10         forgotten password unable to reset   
11                   i cant reset my password   
12             unable to uplod file to server   
13         cant upload files on to the server   
14       file uploading to server not working   
15         no internet connection please help   
16          checkout page says payment failed   
17      settings page crashes app immediately   
18            password reset link not working   
19  server upload fa