In [1]:
import pandas as pd
import torch
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline, BertTokenizer, BertForQuestionAnswering
problem_solv = pd.read_csv("Dataset1.csv")
problem_solv.head()

Unnamed: 0,id,problem,solution
0,1,The construction industry is indubitably one o...,"Herein, we propose an innovative approach to m..."
1,2,"I'm sure you, like me, are feeling the heat - ...","Imagine standing on a green hill, not a single..."
2,3,The massive shift in student learning towards ...,"Implement a """"Book Swap"""" program within educa..."
3,4,The fashion industry is one of the top contrib...,The proposed solution is a garment rental serv...
4,5,The majority of the materials used in producin...,An innovative concept would be a modular elect...


In [7]:
#DistilBERT
from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering
import torch

def answer_question(context, question):
    # Load pre-trained DistilBERT model and tokenizer
    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased-distilled-squad')
    model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-cased-distilled-squad')

    # Tokenize input text
    inputs = tokenizer(context, question, return_tensors="pt", truncation=True)

    # Perform inference
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Extract start and end logits from the model's output
    start_logits = outputs.start_logits
    end_logits = outputs.end_logits

    # Get the most probable start and end indices
    start_index = torch.argmax(start_logits, dim=1).item()
    end_index = torch.argmax(end_logits, dim=1).item()

    # Decode the answer from the original text
    answer = tokenizer.decode(inputs['input_ids'][0, start_index:end_index+1])

    return answer

# Example usage
context_text = "Herein, we propose an innovative approach to mitigate this problem: Modular Construction. This method embraces recycling and reuse, taking a significant stride towards a circular economy.   Modular construction involves utilizing engineered components in a manufacturing facility that are later assembled on-site. These components are designed for easy disassembling, enabling them to be reused in diverse projects, thus significantly reducing waste and conserving resources.  Not only does this method decrease construction waste by up to 90%, but it also decreases construction time by 30-50%, optimizing both environmental and financial efficiency. This reduction in time corresponds to substantial financial savings for businesses. Moreover, the modular approach allows greater flexibility, adapting to changing needs over time.  We believe, by adopting modular construction, the industry can transit from a take, make and dispose model to a more sustainable reduce, reuse, and recycle model, driving the industry towards a more circular and sustainable future. The feasibility of this concept is already being proven in markets around the globe, indicating its potential for scalability and real-world application."
question_text = "The construction industry is indubitably one of the significant contributors to global waste, contributing approximately 1.3 billion tons of waste annually, exerting significant pressure on our landfills and natural resources. Traditional construction methods entail single-use designs that require frequent demolitions, leading to resource depletion and wastage."

answer = answer_question(context_text, question_text)
print("Answer:", answer)

Answer: [CLS]


In [9]:
#RoBERTa
from transformers import RobertaTokenizer, RobertaForQuestionAnswering
import torch

def answer_question(context, question):
    # Load pre-trained DistilBERT model and tokenizer
    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    model = RobertaForQuestionAnswering.from_pretrained('roberta-base')

    # Tokenize input text
    inputs = tokenizer(context, question, return_tensors="pt", truncation=True)

    # Perform inference
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Extract start and end logits from the model's output
    start_logits = outputs.start_logits
    end_logits = outputs.end_logits

    # Get the most probable start and end indices
    start_index = torch.argmax(start_logits, dim=1).item()
    end_index = torch.argmax(end_logits, dim=1).item()

    # Decode the answer from the original text
    answer = tokenizer.decode(inputs['input_ids'][0, start_index:end_index+1])

    return answer

# Example usage
context_text = "Herein, we propose an innovative approach to mitigate this problem: Modular Construction. This method embraces recycling and reuse, taking a significant stride towards a circular economy.   Modular construction involves utilizing engineered components in a manufacturing facility that are later assembled on-site. These components are designed for easy disassembling, enabling them to be reused in diverse projects, thus significantly reducing waste and conserving resources.  Not only does this method decrease construction waste by up to 90%, but it also decreases construction time by 30-50%, optimizing both environmental and financial efficiency. This reduction in time corresponds to substantial financial savings for businesses. Moreover, the modular approach allows greater flexibility, adapting to changing needs over time.  We believe, by adopting modular construction, the industry can transit from a take, make and dispose model to a more sustainable reduce, reuse, and recycle model, driving the industry towards a more circular and sustainable future. The feasibility of this concept is already being proven in markets around the globe, indicating its potential for scalability and real-world application."
question_text = "The construction industry is indubitably one of the significant contributors to global waste, contributing approximately 1.3 billion tons of waste annually, exerting significant pressure on our landfills and natural resources. Traditional construction methods entail single-use designs that require frequent demolitions, leading to resource depletion and wastage."

answer = answer_question(context_text, question_text)
print("Answer:", answer)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForQuestionAnswering: ['lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForQuestionAnswering were not initialized from the model checkpoint at roberta-base and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inf

Answer:  economy.   Modular construction involves utilizing engineered components in a manufacturing facility that are later assembled on-site. These components are designed for easy disassembling, enabling them to be reused in diverse projects, thus significantly reducing waste and conserving resources.  Not only does this method decrease construction waste by up to 90%, but it also decreases construction time by 30-50%, optimizing both environmental and financial efficiency. This reduction in time corresponds to substantial financial savings for businesses. Moreover, the modular approach allows greater flexibility, adapting to changing needs over time.  We believe, by adopting modular construction, the industry can transit from a take,


In [20]:
# T5
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

def answer_question(context, question):
    # Load pre-trained DistilBERT model and tokenizer
    tokenizer = T5Tokenizer.from_pretrained('t5-base')
    model = T5ForConditionalGeneration.from_pretrained('t5-base')
    
    #input_text = f"answer: {context} context: {question}"
    
    # Tokenize input text
    input_ids = tokenizer.encode(context, question, return_tensors='pt', truncation=True)

    # Perform inference
    with torch.no_grad():
        outputs = model.generate(input_ids)

    # Decode the generated answer from the output IDs
    generated_answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return generated_answer

# Example usage
context_text = "Herein, we propose an innovative approach to mitigate this problem: Modular Construction. This method embraces recycling and reuse, taking a significant stride towards a circular economy.   Modular construction involves utilizing engineered components in a manufacturing facility that are later assembled on-site. These components are designed for easy disassembling, enabling them to be reused in diverse projects, thus significantly reducing waste and conserving resources.  Not only does this method decrease construction waste by up to 90%, but it also decreases construction time by 30-50%, optimizing both environmental and financial efficiency. This reduction in time corresponds to substantial financial savings for businesses. Moreover, the modular approach allows greater flexibility, adapting to changing needs over time.  We believe, by adopting modular construction, the industry can transit from a take, make and dispose model to a more sustainable reduce, reuse, and recycle model, driving the industry towards a more circular and sustainable future. The feasibility of this concept is already being proven in markets around the globe, indicating its potential for scalability and real-world application."
question_text = "The construction industry is indubitably one of the significant contributors to global waste, contributing approximately 1.3 billion tons of waste annually, exerting significant pressure on our landfills and natural resources. Traditional construction methods entail single-use designs that require frequent demolitions, leading to resource depletion and wastage."

answer = answer_question(context_text, question_text)
print("Answer:", answer)

Answer: The construction industry contributes approximately 1.3 billion tons of waste annually. The construction industry contribute


In [29]:
# XLNet
from transformers import XLNetTokenizer 
from transformers import XLNetForQuestionAnsweringSimple
from torch.nn import functional as F
import torch

def answer_question(context, question):
    # Load pre-trained XLNet model and tokenizer
    tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
    model = XLNetForQuestionAnsweringSimple.from_pretrained('xlnet-base-cased')

    # Tokenize input text
    inputs = tokenizer.encode_plus(question, context, return_tensors='pt', truncation=True)

    # Perform inference
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the answer span from the start and end logits
    start_max = torch.argmax(F.softmax(outputs.start_logits, dim = -1))
    end_max = torch.argmax(F.softmax(outputs.end_logits, dim=-1)) + 1 

    # Decode the answer from the original text
    answer = tokenizer.decode(inputs["input_ids"][0][start_max : end_max])

    return answer

# Example usage
#context_text = "Herein, we propose an innovative approach to mitigate this problem: Modular Construction. This method embraces recycling and reuse, taking a significant stride towards a circular economy.   Modular construction involves utilizing engineered components in a manufacturing facility that are later assembled on-site. These components are designed for easy disassembling, enabling them to be reused in diverse projects, thus significantly reducing waste and conserving resources.  Not only does this method decrease construction waste by up to 90%, but it also decreases construction time by 30-50%, optimizing both environmental and financial efficiency. This reduction in time corresponds to substantial financial savings for businesses. Moreover, the modular approach allows greater flexibility, adapting to changing needs over time.  We believe, by adopting modular construction, the industry can transit from a take, make and dispose model to a more sustainable reduce, reuse, and recycle model, driving the industry towards a more circular and sustainable future. The feasibility of this concept is already being proven in markets around the globe, indicating its potential for scalability and real-world application."
#question_text = "The construction industry is indubitably one of the significant contributors to global waste, contributing approximately 1.3 billion tons of waste annually, exerting significant pressure on our landfills and natural resources. Traditional construction methods entail single-use designs that require frequent demolitions, leading to resource depletion and wastage."

answer = answer_question(context_text, question_text)
print("Answer:", answer)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForQuestionAnsweringSimple: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForQuestionAnsweringSimple from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForQuestionAnsweringSimple from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForQuestionAnsweringSimple were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Asking to truncate to max_length but no maximum leng

Answer: -50%, optimizing both environmental and financial efficiency. This reduction in time corresponds to substantial financial savings for businesses. Moreover, the modular approach allows greater flexibility, adapting to changing needs over time. We believe, by adopting modular construction, the industry can transit from a take, make and dispose model to a more sustainable reduce, reuse, and recycle model, driving the industry towards a more circular and


In [30]:
# XLNet
from transformers import XLNetTokenizer 
from transformers import XLNetForQuestionAnsweringSimple
from torch.nn import functional as F
import torch

def answer_question(row):
    # Load pre-trained XLNet model and tokenizer
    tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
    model = XLNetForQuestionAnsweringSimple.from_pretrained('xlnet-base-cased')

    # Tokenize input text
    inputs = tokenizer.encode_plus(row['problem'], row['solution'], return_tensors='pt', truncation=True)

    # Perform inference
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the answer span from the start and end logits
    start_max = torch.argmax(F.softmax(outputs.start_logits, dim = -1))
    end_max = torch.argmax(F.softmax(outputs.end_logits, dim=-1)) + 1 

    # Decode the answer from the original text
    answer = tokenizer.decode(inputs["input_ids"][0][start_max : end_max])

    return answer

# Example usage
#context_text = "Herein, we propose an innovative approach to mitigate this problem: Modular Construction. This method embraces recycling and reuse, taking a significant stride towards a circular economy.   Modular construction involves utilizing engineered components in a manufacturing facility that are later assembled on-site. These components are designed for easy disassembling, enabling them to be reused in diverse projects, thus significantly reducing waste and conserving resources.  Not only does this method decrease construction waste by up to 90%, but it also decreases construction time by 30-50%, optimizing both environmental and financial efficiency. This reduction in time corresponds to substantial financial savings for businesses. Moreover, the modular approach allows greater flexibility, adapting to changing needs over time.  We believe, by adopting modular construction, the industry can transit from a take, make and dispose model to a more sustainable reduce, reuse, and recycle model, driving the industry towards a more circular and sustainable future. The feasibility of this concept is already being proven in markets around the globe, indicating its potential for scalability and real-world application."
#question_text = "The construction industry is indubitably one of the significant contributors to global waste, contributing approximately 1.3 billion tons of waste annually, exerting significant pressure on our landfills and natural resources. Traditional construction methods entail single-use designs that require frequent demolitions, leading to resource depletion and wastage."

problem_solv['soln_extract'] = problem_solv.apply(answer_question, axis=1)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForQuestionAnsweringSimple: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForQuestionAnsweringSimple from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForQuestionAnsweringSimple from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForQuestionAnsweringSimple were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Asking to truncate to max_length but no maximum leng

Some weights of XLNetForQuestionAnsweringSimple were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForQuestionAnsweringSimple: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForQuestionAnsweringSimple from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForQuestionAnsweringSimple from the checkpoint of a model that you expect to be exactly identical (initial

In [31]:
problem_solv

Unnamed: 0,id,problem,solution,soln_extract
0,1,The construction industry is indubitably one o...,"Herein, we propose an innovative approach to m...",
1,2,"I'm sure you, like me, are feeling the heat - ...","Imagine standing on a green hill, not a single...",the heat - literally! With World Health Organi...
2,3,The massive shift in student learning towards ...,"Implement a """"Book Swap"""" program within educa...","-waste from obsolete devices. Simultaneously, ..."
3,4,The fashion industry is one of the top contrib...,The proposed solution is a garment rental serv...,", leading to the release of greenhouse gases f..."
4,5,The majority of the materials used in producin...,An innovative concept would be a modular elect...,
5,6,Businesses worldwide expend substantial financ...,The proposed solution involves developing a se...,
6,7,more than 130 Billon plastic bottles waste ann...,Bariq factory to recyle plastic bottels,130 Billon plastic bottles waste annualy in Eg...
7,8,"In congested cities like Berlin, one of the si...",Let's revolutionize the carsharing experience...,"""""Ride-sharing Radar"""" - Your carpooling maest..."
8,9,One major global issue we face today is the su...,"My solution is an innovative Reloop - System, ...",
9,10,The usage of plastic bottles,"Creating a service that sells bottles, and re-...",


In [None]:
# Manual Cleaning and Pre processing of the Solutions using NLP methods
import re
import punctuation from string
string.punctuation
from nltk.corpus import stopwords

from nltk.tokenize import word_tokenize, sent_tokenize

for i in problem_solv.index:
    sampl_sents = sent_tokenize(sampl_punct2)
    sampl_words = [word_tokenize(sent) for sent in sampl_sents]
    text_nonpunct = [char for char in sampl_words if char not in string.punctuation]
    customStopWords = set(stopwords.words('english')+list(punctuation))
    sampl_nostopwords = [word for word in word_tokenize(text_nonpunct) if word not in customStopWords]
