In [None]:
!pip install -U sentence-transformers==2.2.2
!pip install pandas
!pip install numpy

In [6]:
import transformers
import sentence_transformers

print(f"Transformers version: {transformers.__version__}")
print(f"Sentence-Transformers version: {sentence_transformers.__version__}")

Transformers version: 4.31.0
Sentence-Transformers version: 2.2.2


In [7]:
import pandas as pd
import numpy as np

In [8]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

In [9]:
# Function to process and augment the questions
def process_questions(file_info):
    df = pd.read_csv('../docs/' + file_info['file_path'], sep='|', header=None, names=['question', 'difficulty'])

    # Strip whitespace from the columns
    df['question'] = df['question'].str.strip()

    # Add the technology column
    df['technology'] = file_info['technology']

    # Preprocess questions
    # df['question'] = df['question'].apply(preprocess)

    # Generate paraphrased questions
    # df['paraphrased_question'] = df['question'].apply(lambda x: paraphrase(x, paraphraser_model, paraphraser_tokenizer))

    return df

# Load question datasets and preprocess
java_questions = process_questions({'file_path': 'java.txt', 'technology': 'java'})
microservice_questions = process_questions({'file_path': 'microservice', 'technology': 'microservice'})
springboot_questions = process_questions({'file_path': 'springboot', 'technology': 'springboot'})
mysql_questions = process_questions({'file_path': 'database', 'technology': 'database'})
docker_k8s_questions = process_questions({'file_path': 'devops', 'technology': 'devops'})

# Concatenate all questions
questions_df = pd.concat([java_questions, microservice_questions, springboot_questions, mysql_questions, docker_k8s_questions], ignore_index=True)

In [10]:
# convert dataframe column to list
sentences1 = questions_df['question'].tolist()

In [11]:
sentences1

['What is a class in Java?',
 'How do you define a method in Java?',
 'What is the purpose of the main method in Java?',
 'What is a variable in Java?',
 'How do you declare an array in Java?',
 'What is the difference between int and float in Java?',
 'What is the syntax for a for loop in Java?',
 'How do you create an object in Java?',
 'What is a constructor in Java?',
 'What is the purpose of comments in Java?',
 'What is a String in Java?',
 'What is the use of the if statement in Java?',
 'How do you declare a constant in Java?',
 'What is the boolean type used for in Java?',
 'What does the break statement do in Java?',
 'How do you check the length of an array in Java?',
 'What is the difference between == and = in Java?',
 'What is a while loop in Java?',
 'How do you create a simple Java program that prints "Hello, World"?',
 'What does the return statement do in Java?',
 'What is the difference between == and equals() in Java?',
 'How do you handle exceptions in Java?',
 'Wh

In [12]:
sentences2 = [
    "what is dependency injection?",
    "What is the difference between @Component and @Bean?"
]

In [13]:
# Compute embeddings for both lists
embeddings1 = model.encode(sentences1)
embeddings2 = model.encode(sentences2)

In [14]:
# Compute cosine similarities using dot product
similarities = np.dot(embeddings2, embeddings1.T)

In [15]:
# Function to get top N similar matches
def get_top_n_similar(sentences1, sentences2, similarities, top_n=2):
    for idx2, sentence2 in enumerate(sentences2):
        # Get the indices of the top N most similar questions in sentences1
        top_indices = np.argsort(similarities[idx2])[::-1][:top_n]
        print(f"\nQuestion: '{sentence2}'")
        print(f"Top {top_n} matching questions:")
        for idx in top_indices:
            print(f" - {sentences1[idx]} (Score: {similarities[idx2][idx]:.4f})")

# Get top 2 similar matches
get_top_n_similar(sentences1, sentences2, similarities, top_n=2)


Question: 'what is dependency injection?'
Top 2 matching questions:
 - What is the importance of dependency injection in microservices? (Score: 0.6943)
 - How can you implement dependency injection in microservices? (Score: 0.6846)

Question: 'What is the difference between @Component and @Bean?'
Top 2 matching questions:
 - What is the difference between @Component, @Service, and @Repository? (Score: 0.7711)
 - How do you use @Bean to define custom beans in Spring Boot? (Score: 0.6554)


In [16]:
import pickle
model.save('sentence_similarity_detector_model')

# Save questions DataFrame using pickle
with open('questions_df2.pkl', 'wb') as f:
    pickle.dump(questions_df, f)