In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install transformers
!pip install tensorflow
!pip install sentence_transformers
!pip install rouge

In [3]:
# Loading all necessary libraries

from transformers import AutoTokenizer, BertForQuestionAnswering, pipeline, TFAutoModelForQuestionAnswering
import tensorflow as tf
import re
from nltk.tokenize import sent_tokenize, word_tokenize
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import string
import glob
from os import truncate
from pathlib import Path
import textwrap
import time

In [4]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [5]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
#Creating user_ratings_df dataframe to store the user feedback
user_ratings_df = pd.DataFrame(columns=['User_Name','User_ID','Question','Answer', 'Rating'])
user_ratings_df

Unnamed: 0,User_Name,User_ID,Question,Answer,Rating


In [None]:
#Loading the models
tokenizer_pipeline = AutoTokenizer.from_pretrained('deepset/bert-base-cased-squad2')
model_pipeline = BertForQuestionAnswering.from_pretrained('deepset/bert-base-cased-squad2')
nlp = pipeline('question-answering', model=model_pipeline, tokenizer=tokenizer_pipeline)
wrapper = textwrap.TextWrapper(width=120) 

In [8]:
greetings_list = ['hi','hello','hey','morning','afternoon','evening']
account_list = ['login','account','locked','password','forgot','reset','unlock']

In [9]:
#Loading the reference text
working_dir = '/content/drive/MyDrive/NLP_data'

txt_files  =  glob.glob(working_dir+'//*.txt')
print(txt_files)
mult_text_l = []

# append the different files content to a list
for file in txt_files:
    with open (file, 'r') as f:
        s_text_list = f.read()
        mult_text_l.append(s_text_list)

# Pre-processing the text files and appending into a single reference text   
text = ' '.join(mult_text_l)
text = text.replace('\n', '')
text = re.sub("[!\"#$%&'‘’()*+,\-/:;<=>?@[\]^_`{|}~]", " ", text)
text = re.sub(' +', ' ', text)
print(text)


['/content/drive/MyDrive/NLP_data/Regression_Manual.txt', '/content/drive/MyDrive/NLP_data/KNN & Regression specific queries (1).txt', '/content/drive/MyDrive/NLP_data/Binary logit queries.txt', '/content/drive/MyDrive/NLP_data/Decision_Tree.txt', '/content/drive/MyDrive/NLP_data/Cluster Analysis.txt', '/content/drive/MyDrive/NLP_data/General_queries.txt']
Hi can I help you. Hi Im good. Thank you. Can I help you with any query. Regression analysis is a way of mathematically sorting out which of those variables does indeed have an impact. A dependent variable is the main factor that you re trying to understand or predict. The independent variables are the factors you suspect have an impact on your dependent variable. The left side of the screen left panel has an option of Input data Click on the Browse option and upload dataset in CSV format here. The overview tab provides you with relevant study resources tutorials sample datasets and a short overview to start with which helps you unde

In [10]:
full_stops = []

# As response to user query, BERT extracts relevant phrase only. For better readability, We are trying to return the complete sentence.
# FOr this, we need to identify sentence start and end, based on the location of the extracted phrase 
# We will identify sentence start and end based on the location of the full-stops.
# This code is to create a list of all full stops. 

for i in range(len(text)):
  if text[i] == '.':
    full_stops.append(i)
print(full_stops)

[17, 29, 40, 71, 183, 268, 365, 498, 682, 864, 1059, 1265, 1572, 1774, 2042, 2201, 2287, 2402, 2582, 2751, 3032, 3188, 3354, 3623, 3767, 3854, 4141, 4326, 4544, 4619, 4773, 5051, 5393, 5490, 5683, 5898, 6055, 6222, 6388, 6519, 6630, 6715, 6812, 7013, 7280, 7439, 7525, 7640, 7820, 7989, 8270, 8426, 8592, 8864, 9011, 9098, 9388, 9573, 9791, 9866, 10020, 10298, 10640, 10737, 10930, 11083, 11164, 11309, 11382, 11487, 11585, 11705, 11793, 11814, 11889, 12185, 12328, 12421, 12490, 12551, 12653, 12747, 12922, 12943, 12982, 13219, 13309, 13405, 13749, 13949, 14034, 14166, 14324, 14391, 14498, 14565, 14632, 14739, 14986, 15206, 15307, 15413, 15549, 15588, 15814, 16146, 16465, 16898, 17135, 17439, 17704, 17948, 18150, 18154, 18218, 18520, 18703, 18885, 19080, 19286, 19585, 19852, 19972, 20058, 20173, 20521, 20802, 20958]


In [32]:

def user_ratings(question, answer, rating):
    user_ratings_df.loc[len(user_ratings_df.index)] = ['User', len(user_ratings_df)+1, question, answer, rating]


In [12]:
def question_answer_pipeline(question):
    question_list = question.lower().split()
    if any(word in question_list for word in greetings_list):
        return 'Hi! How may I help you?'
    elif any(word in question_list for word in account_list):
        return 'Please reset your password and if the issue still persists, please email info@zerocodelearnging.com'
    else:
        #tokenize question and text as a pair
        output = nlp({'question': question, 'context':text})
        answer = output['answer']
        probability = output['score']
        #print(answer)
        #print(probability)

        #Returning Answer
        if answer.startswith("[CLS]") or answer.startswith("[SEP]") or answer.startswith(" "):
            answer = "Unable to find the answer to your question. Please reachout to info@zerocodelearnging.com"
            #print(f"Answer: {answer}")
            return answer
        else:
            try:
                # Extracting complete sentence based on start and end positions of QA extract
                for j in range(len(full_stops)):
                    if full_stops[j] < output['start']:
                    #print(stops[j],ans['start'])
                        ans_start = full_stops[j]+1
                        ans_end = full_stops[j+1]+1
                answer = wrapper.fill(text[ans_start:ans_end])
                #print(answer)
                return answer
            except IndexError:
                #print(f"Answer: {answer}")
                return answer

Baseline model : Using BERT & Pattern Matching

In [None]:
#Loading the models
tokenizer_baseline = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
model_baseline = TFAutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')

In [14]:
#similarity_score(ques, answers_list) function calculates the similarity score of all question and answer pair.
# The function returns the answer with highest similarity score

def similarity_score(ques, answers_list):
    scores_df = pd.DataFrame(columns=['question','answer','score'])

    embeddings1 = similarity_model.encode(question, convert_to_tensor=True)

    for sent in answers_list:
        embeddings2 = similarity_model.encode(sent, convert_to_tensor=True)
        cosine_score = util.cos_sim(embeddings1, embeddings2)
        score = '{:.4f}'.format(cosine_score[0][0])
        scores_df.loc[len(scores_df.index)] = [question, sent, score]

    scores_df = scores_df.sort_values(by=['score'], ascending=False)
    #print(scores_df)
    return scores_df['answer'].iloc[0]

In [15]:
def question_answer_baseline(question):
    question_list = question.lower().split()
    if any(word in question_list for word in greetings_list):
        return 'Hi! How may I help you?'

    elif any(word in question_list for word in account_list):
        return 'Please reset your password and if the issue still persists, please email info@zerocodelearnging.com'
    else:
        #tokenize question and text as a pair
        inputs = tokenizer_baseline(question, text, add_special_tokens=True, return_tensors="tf", max_length=512, truncation=True)
        input_ids = inputs["input_ids"].numpy()[0]

        #string version of tokenized ids
        text_tokens = tokenizer_baseline.convert_ids_to_tokens(input_ids)

        #model output using input
        output = model_baseline(inputs)
        #reconstructing the answer
        answer_start = tf.argmax(
            output.start_logits, axis=1
        ).numpy()[0]  # Get the most likely beginning of answer with the argmax of the score
        answer_end = (
            tf.argmax(output.end_logits, axis=1) + 1
        ).numpy()[0]  # Get the most likely end of answer with the argmax of the score
        answer = tokenizer_baseline.convert_tokens_to_string(tokenizer_baseline.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
        #print(answer)
    
        #Returning Answer
        if answer.startswith("[CLS]") or answer.startswith("[SEP]") or answer.startswith(" "):
            answer = "Unable to find the answer to your question. Please reachout to info@zerocodelearnging.com"
            #print(f"Answer: {answer}")
            return answer
        else:
            try:
                temp_list = []
                answers_list = []
                #pattern matching the sentence
                temp_list = re.findall(r"([^.]*?%s[^.]*\.)" % answer, text.lower())
                #print(temp_list)
                if len(temp_list)>1:
                    for line in temp_list:
                        answers_list.append(line)
                    answer = similarity_score(question, answers_list)
                    #print(f"Answer: {answer}")
                    return answer
                else:
                    #print(temp_list)
                    answer = temp_list[0]
                    #print(f"Answer: {answer}")
                    return answer
            except IndexError:
                #print(f"Answer: {answer}")
                return answer

**Dynamic model selection**

In [16]:
#function that calculates the similarity score of all question and response pair and returns the response with highest similarity.

def best_response(question):
    #starting time counter
    #start_time = time.perf_counter()

    question = re.sub("[!\"#$%&'‘’()*+,\-/:;<=>?@[\]^_`{|}~]", "", question)
    print(f"Question: {question}")
    
    response_baseline = question_answer_baseline(question) # Capturing baseline model response to the question
    response_pipeline = question_answer_pipeline(question) # Capturing pipeline model response to the question
    answers_list = [response_baseline,response_pipeline] 
    scores_df = pd.DataFrame(columns=['question','answer','score']) 

    embeddings1 = similarity_model.encode(question, convert_to_tensor=True)

    for sent in answers_list:
        embeddings2 = similarity_model.encode(sent, convert_to_tensor=True)
        cosine_score = util.cos_sim(embeddings1, embeddings2)
        score = '{:.4f}'.format(cosine_score[0][0])
        scores_df.loc[len(scores_df.index)] = [question, sent, score]

    scores_df = scores_df.sort_values(by=['score'], ascending=False)
    #print(scores_df)
    #end_time = time.perf_counter()
    return scores_df['answer'].iloc[0]

In [28]:
# Feedback loop for every question

question= input("\nHi! How may I help you? \n")
while True:
    answer = best_response(question)
    print(f"Answer: {answer}") # Model response being shared to the user

    # Prompting the user to give feedback based on the response to his / her query
    if(answer != 'Hi! How may I help you?'):
        rating = int(input("\nOn a scale of 1-10, how was your conversation experience with us? "))
        print(f"User rating is : {rating}")
        user_ratings(question, answer, rating)

    flag = True
    flag_N = False
    
    while flag:
        response = input("\nDo you want to ask another question(Y/N)? ")
        if response[0] == "Y":
            question = input("\nPlease enter your question: \n")
            flag = False
        elif response[0] == "N":
            print("\nThankyou!")
            flag = False
            flag_N = True
            
    if flag_N == True:
        break


Hi! How may I help you? 
where can i see PCA visualization
Question: where can i see PCA visualization
Answer:  data visualization is the representation of data through use of common graphics such as charts plots infographics and even animations to communicate complex data relationships and data driven insights.

On a scale of 1-10, how was your conversation experience with us? 6
User rating is : 6

Do you want to ask another question(Y/N)? N

Thankyou!


In [33]:
# Table that captures all Questions posed to the bot, bot responses and user ratings.
# This table needs to be periodically reviewed for low user ratings. Accordingly, correction measures, such as rewording context file etc could be taken

user_ratings_df

Unnamed: 0,User_Name,User_ID,Question,Answer,Rating
0,User,1,What is VIF,variance inflation factor or vif is a measure of the amount of multicollinearity in a set of multiple regression variables and for a regression model variable is equal to the ratio of the overall model variance to the variance of a model that includes only that single independent variable.,9
1,User,2,Where to upload data,the left side of the screen left panel has an option of input data click on the browse option and upload dataset in csv format here.,10
2,User,3,what is pair plot,a pair plot gives pairwise relationships in a dataset to understand the best set of features to explain a relationship between two variables or to form the most separated clusters the pair plot function creates a grid of axes such that each variable in data will be shared in the y axis across a single row and on the x axis across a single column.,8
3,User,4,My login expired,"Please reset your password and if the issue still persists, please email info@zerocodelearnging.com",4
4,User,5,How to reset password?,"Please reset your password and if the issue still persists, please email info@zerocodelearnging.com",10
5,User,6,how to determine correct k means,Data Summary tab enables you to get a comprehensive evaluation through statistical measures that help us form the basis\nof our analysis It will display all the descriptive analytics measures including minimum value maximum value range\nbetween data values mean median standard deviation variance etc.,2
6,User,7,what is RMSE,Root Mean Square Error or RMSE is the standard deviation of the residuals or it tells you how concentrated the data is\naround the line of best fit.,10
7,User,8,where can i see PCA visualization,data visualization is the representation of data through use of common graphics such as charts plots infographics and even animations to communicate complex data relationships and data driven insights.,6


### Testing code
No need to re-run this if the Context file and Questions are unchanged

In [19]:
# File with Test Questions and their correct answers being read. Models will be tested on these questions.
df = pd.read_excel("/content/drive/MyDrive/NLP_data/Q&A_for_testing.xlsx")

In [None]:
# All test questions are being passed to the model to retrieve responses. This will take about 25-30 mins to run.

Question_baseline = []
Model_Answer_baseline = []
response_baseline = []
Response_time_baseline = []

for q in range(df.shape[0]-1):
  Question_baseline.append(df[['Question']].loc[q][0])
  question = df[['Question']].loc[q][0]
  response_baseline = question_answer_baseline(df[['Question']].loc[q][0])
  Model_Answer_baseline.append(response_baseline[0])
  Response_time_baseline.append(response_baseline[1])

performance_baseline = pd.DataFrame(zip(Question_baseline,Model_Answer_baseline,Response_time_baseline),
                           columns = ["Question","Model_Answer","Response_time"])

performance_baseline = pd.merge(left = performance_baseline, right = df, on = "Question")
performance_baseline.drop_duplicates('Question')

Question: Iam unable to login
Question: how to reset password
Question: how to upload data
Question: where to upload data
Question: what does overview tab contain
Question: what does overview tab say
Question: where to select Y variable
Question: How to remove missinig values
Question: How to impute missing values
Question: How to select only some part of the data 
Question: how to select sub sample
Question: What does Data Summary tab say
Question: What does Data Summary tab contain
Question: what is a Histogram
Question: what is a pairplot
Question: what is a correlation table
Question: what is a correlation matrix
Question: what is KNN
Question: what is knearest neighbors
Question: how to set test sample percentage
Question: how to set train test sample percentage
Question: How to select maximum nearest neighbours
Question: How to set maximum nearest neighbours
Question: How to set CV folds
Question: what does KNN results tab contain
Question: what is regression
Question: What is Da

Unnamed: 0,Question,Model_Answer,Response_time,Correct_Answer
0,Iam unable to login,"Please reset your password and if the issue still persists, please email our support team.",0.001377,"If your account is locked or if you are unable to login, please reset your password by clicing on reset password option in login page and if the issue still persists, please email our support team"
1,how to reset password,"Please reset your password and if the issue still persists, please email our support team.",0.000842,"If your account is locked or if you are unable to login, please reset your password by clicing on reset password option in login page and if the issue still persists, please email our support team"
2,how to upload data,the left side of the screen left panel has an option of input data click on the browse option and upload dataset in csv format here.,9.632273,The left panel has an option of Input data - Click on the Browse option and upload dataset in CSV format here.
3,where to upload data,the left side of the screen left panel has an option of input data click on the browse option and upload dataset in csv format here.,8.10552,The left panel has an option of Input data - Click on the Browse option and upload dataset in CSV format here.
4,what does overview tab contain,the overview tab provides you with relevant study resources tutorials sample datasets and a short overview to start with which helps you understand and comprehend your data correctly.,8.282703,"The Overview tab provides you with relevant study resources, tutorials, sample datasets and a short overview to start with, which helps you understand and comprehend your data correctly."
5,what does overview tab say,the overview tab provides you with relevant study resources tutorials sample datasets and a short overview to start with which helps you understand and comprehend your data correctly.,8.101657,"The Overview tab provides you with relevant study resources, tutorials, sample datasets and a short overview to start with, which helps you understand and comprehend your data correctly."
6,where to select Y variable,on the left panel there is an option called data selection where you can select your favourable number of y variables required to base the analysis select and click on apply changes.,9.839885,"On the left panel, there is an option called Data selection where you can select your favourable number of Y variables required to base the analysis - Select and click on Apply Changes"
7,How to remove missinig values,click on the drop down to select the option of dropping or imputing the missing values. the data summary option enables you to get a comprehensive evaluation through statistical measures that help us form the basis of our analysis it will display all the descriptive analytics measures including minimum value maximum value range between data values mean median standard deviation variance etc. data visualization is the representation of data through use of common graphics such as charts plots infographics and even animations to communicate complex data relationships and data driven insights. a histogram is a graph that shows the frequency of numerical data using rectangles to give a rough sense of the density of the underlying distribution of the data and often for density estimation estimating the probability density function of the underlying variable. in a histogram the height of a rectangle the vertical axis represents the distribution frequency of a variable the amount or how often that variable appears. the total area of a histogram used for probability density is always normalized to 1. if the length of the intervals on the x axis are all 1 then a histogram is identical to a relative frequency plot. a pair plot gives pairwise relationships in a dataset to understand the best set of features to explain a relationship between two variables or to form the most separated clusters. the pair plot function creates a grid of axes such that each variable in data will be shared in the y [SEP],8.261695,The left panel has Advanced Options section where there is option of Impute missing values or drop missing value rows - Click on the drop down to select the option of dropping or imputing the missing values.
8,How to impute missing values,the left panel has advanced options section where there is option of impute missing values or drop missing value rows click on the drop down to select the option of dropping or imputing the missing values.,8.103327,The left panel has Advanced Options section where there is option of Impute missing values or drop missing value rows - Click on the drop down to select the option of dropping or imputing the missing values.
9,How to select only some part of the data,on the left panel there is an option called data selection where you can select your favourable number of y variables required to base the analysis select and click on apply changes.,8.12345,The left panel has Advanced Options section where there is option of Select sub sample - Click on the drop down option to select random number of observations for testing or the whole data itself


In [None]:
performance_baseline.to_excel("performance_baseline.xlsx")

In [None]:
# get the scores
baseline_scores = pd.DataFrame(rouge.get_scores(performance_baseline['Model_Answer'], performance_baseline['Correct_Answer'],avg = True))
baseline_scores

Unnamed: 0,rouge-1,rouge-2,rouge-l
r,0.377482,0.269464,0.362161
p,0.475986,0.313447,0.463369
f,0.386457,0.274793,0.375345


In [None]:
performance_baseline['Response_time'].describe()

count    57.000000
mean      8.707267
std       2.481541
min       0.000842
25%       8.047797
50%       8.123450
75%       8.847082
max      16.845257
Name: Response_time, dtype: float64

In [None]:
# All test questions are being passed to the model to retrieve responses. This will take about 25-30 mins to run.
# A table will be created with the Questions, their correct answers (context), model responses, probability of model response & response time

Question_pipeline = []
Model_Answer_pipeline = []
Probability_pipeline = []
Response_time_pipeline = []

# All questione being passed to pipeline model. 
for q in range(df.shape[0]-1):
  Question_pipeline.append(df[['Question']].loc[q][0])
  response_pipeline = question_answer_pipeline(df[['Question']].loc[q][0])
  Model_Answer_pipeline.append(response_pipeline[0])
  Probability_pipeline.append(response_pipeline[1])
  Response_time_pipeline.append(response_pipeline[2])

# Creating a dataframe with pipeline model responses, response time and probability being captured.
performance_pipeline = pd.DataFrame(zip(Question_pipeline,Model_Answer_pipeline,Probability_pipeline,Response_time_pipeline),
                           columns = ["Question","Model_Answer","Probability","Response_time"])

performance_pipeline = pd.merge(left = performance_pipeline, right = df, on = "Question")
performance_pipeline.drop_duplicates('Question')

Question: Iam unable to login
Question: how to reset password
Question: how to upload data
dataset in CSV format
Question: where to upload data
Voronoi cells
Question: what does overview tab contain
a short overview to start with which helps you understand and comprehend your data correctly
Question: what does overview tab say
a short overview to start with which helps you understand and comprehend your data correctly
Question: where to select Y variable
Data selection
Question: How to remove missinig values
Impute missing values or drop missing value rows
Question: How to impute missing values
drop missing value rows
Question: How to select only some part of the data 
you can select your favourable number of Y variables required to base the analysis
Question: how to select sub sample
Click on the drop down option
Question: What does Data Summary tab say
evaluates the correlation coefficients between variables
Question: What does Data Summary tab contain
The Summary OLS tab evaluates t

In [None]:
performance_pipeline.to_excel("performance_pipeline.xlsx")

In [None]:
from rouge import Rouge

# initialize the rouge object
rouge = Rouge()

# get the scores
pipeline_scores = pd.DataFrame(rouge.get_scores(performance_pipeline['Model_Answer'], performance_pipeline['Correct_Answer'],avg = True))
pipeline_scores

Unnamed: 0,rouge-1,rouge-2,rouge-l
r,0.610948,0.500234,0.587781
p,0.722025,0.585902,0.694135
f,0.646332,0.524394,0.621985


In [None]:
performance_pipeline['Response_time'].describe()

count    57.000000
mean     23.155675
std       4.582789
min       0.000115
25%      22.971122
50%      23.707143
75%      24.792775
max      26.823984
Name: Response_time, dtype: float64

In [None]:
performance_pipeline['Probability'].describe()

count    57.000000
mean      0.307228
std       0.224729
min       0.001066
25%       0.147455
50%       0.256546
75%       0.400912
max       1.000000
Name: Probability, dtype: float64

In [21]:
#function that calculates the similarity score of all question and response pair and returns the response with highest similarity.
def best_response_hybrid(question, response_list):
    
    response_scores = pd.DataFrame(columns=['question','answer','score'])

    embeddings1 = similarity_model.encode(question, convert_to_tensor=True)

    for sent in response_list:
        embeddings2 = similarity_model.encode(sent, convert_to_tensor=True)
        cosine_score = util.cos_sim(embeddings1, embeddings2)
        score = '{:.4f}'.format(cosine_score[0][0])
        response_scores.loc[len(response_scores.index)] = [question, sent, score]

    response_scores = response_scores.sort_values(by=['score'], ascending=False)
    #print(response_scores)
    return response_scores['answer'].iloc[0]

In [None]:
    #starting time counter
    #start_time = time.perf_counter()
    #end_time = time.perf_counter()


In [None]:
# Both baseline & Pipeline models are run and best response is selected. This will take ~40-45 mins to run.

# Creating new lists to capture model responses, response time and probability
hybrid = []
response_time_best = []
probability_best = []

# Loop to pass all test questions to the model

for i in range(df['Question'].shape[0]-1):
  question = df['Question'][i]
  
  #starting time counter
  start_time = time.perf_counter()

  # Question passed to baseline model
  ans_baseline = question_answer_baseline(df['Question'][i])
  ans_baseline[0] # Extracting baseline model response to the question

  # Question passed to the pipeline model
  ans_pipeline = question_answer_pipeline(df['Question'][i])
  ans_pipeline[0] # Extracting pipeline model response to the question

  # Both the models responses compiled to a single list
  response_list = []
  response_list.append(ans_baseline[0]) 
  response_list.append(ans_pipeline[0]) 
  response_list

  # Responses of both models passed to "best_response" function to select the final response for the user
  best_answer = best_response_hybrid(df['Question'][i],response_list)
  
  end_time = time.perf_counter()
  hybrid.append(best_answer)

  response_time = end_time - start_time

  #if best_answer == ans_baseline[0]:
  #  response_time = ans_baseline[1]
  #else:
  #  response_time = ans_pipeline[2]
    #probability = ans_pipeline[1]
  
  response_time_best.append(response_time)
  #probability_best.append(probability)

performance_hybrid = pd.DataFrame(zip(df['Question'],hybrid,response_time_best),
                           columns = ["Question","Hybrid_model_Answer","Response_time"])

performance_hybrid = pd.merge(left = performance_hybrid, right = df, on = "Question")
performance_hybrid.drop_duplicates('Question')

performance_hybrid





In [None]:
from rouge import Rouge

# initialize the rouge object
rouge = Rouge()

# Capture hybrid model performance details in a dataframe
hybrid_scores = rouge.get_scores(performance_hybrid['Hybrid_model_Answer'], performance_hybrid['Correct_Answer'],avg = True)
hybrid_scores = pd.DataFrame.from_dict(hybrid_scores)
hybrid_scores

Unnamed: 0,rouge-1,rouge-2,rouge-l
r,0.649128,0.540885,0.635016
p,0.774185,0.636314,0.758886
f,0.689433,0.567833,0.675289


In [27]:
performance_hybrid.columns

Index(['Question', 'Hybrid_model_Answer', 'Response_time', 'Correct_Answer'], dtype='object')

In [28]:
performance_hybrid['Response_time'].describe()

count    57.000000
mean     32.774993
std       6.496115
min       0.069677
25%      32.669934
50%      33.253658
75%      34.821488
max      37.969647
Name: Response_time, dtype: float64

In [29]:
#performance_hybrid.to_excel("performance_hybrid.xlsx")