In [None]:
pip install sentence-transformers pandas numpy

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
tickets = pd.read_csv('jira_data.csv')
print(tickets.head())


                                             Summary Issue key  Issue id  \
0                  JRS Notification Template Changes  CITI-458   1465459   
1               Load test for ticket creation in JRS  CITI-457   1463000   
2  Ticket creation fails for description along wi...  CITI-456   1462797   
3  Explore about the ticket recommendation AI mod...  CITI-455   1460127   
4          Configuration support for Ticket creation  CITI-454   1456613   

   Parent id Issue Type       Status Project key Project name Project type  \
0  1452030.0    Subtask        To Do        CITI         CITI     software   
1  1452030.0    Subtask        To Do        CITI         CITI     software   
2  1452030.0    Subtask  In Progress        CITI         CITI     software   
3        NaN       Task        To Do        CITI         CITI     software   
4        NaN       Task         Done        CITI         CITI     software   

  Project lead  ... Comment.6 Comment.7 Comment.8 Comment.9 Comment.10  \


In [6]:
# Combine summary and description into a single text column (if needed)
tickets['text'] = tickets['Summary'] + ' ' + tickets['Description']

# Handle missing values
tickets['text'] = tickets['text'].fillna('')



In [7]:
model = SentenceTransformer('all-mpnet-base-v2')


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [8]:
# Generate embeddings for the ticket descriptions
ticket_embeddings = model.encode(tickets['text'], convert_to_tensor=True)


In [17]:
def get_recommendations(input_text, tickets, ticket_embeddings):
    # Encode input text
    input_embedding = model.encode(input_text, convert_to_tensor=True)

    # Compute cosine similarity between input embedding and ticket embeddings
    sim_scores = cosine_similarity(input_embedding.unsqueeze(0), ticket_embeddings)
    
    # Get indices of tickets sorted by similarity score
    sim_indices = np.argsort(sim_scores[0])[::-1]
    # print(sim_indices)
    
    # Return top 5 recommendations
    recommended_tickets = tickets.iloc[sim_indices[:5]]['Issue key']
    recommended_tickets_summary=tickets.iloc[sim_indices[:5]]['Summary']
    
    return recommended_tickets,recommended_tickets_summary


In [25]:


input_text = "Im looking for citi server"
recommended_tickets,recommended_tickets_summary = get_recommendations(input_text, tickets, ticket_embeddings)
formatted_recommendations = "\n".join([f"{ticket}: {summary}" for ticket, summary in zip(recommended_tickets, recommended_tickets_summary)])
print(formatted_recommendations)

CITI-48: Sprint-7 CITI Support
CITI-85: Sprint 8 2023 CITI support
CITI-279: Analyse about OSS SCAN TOOL to get license information | CITI SERVER
CITI-408: CITI for UFS
CITI-131: Sprint 9 2023 CITI support
