In [54]:
from transformers import pipeline
import pandas as pd
import numpy as np
import string
import scipy
import sklearn
import spacy
import nltk
import re
import os

In [62]:
from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, util

In [63]:
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()

   # Remove symbols and punctuation (except for the case where 2 follows CO)
    text = re.sub(r'[' + re.escape(string.punctuation) + '](?<![cC][oO]2)', '', text)


    # Tokenize the text
    tokens = word_tokenize(text)

    # Remove stopwords and lemmatize
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()

    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]

    # Join the tokens back into a single string
    preprocessed_text = ' '.join(tokens)

    return preprocessed_text

In [15]:
# Load the pre-trained model and tokenizer
contradiction_pipeline = pipeline("text-classification", model="roberta-large-mnli")

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [74]:
def get_contradiction_score(corpus1, corpus2):
    input_text = f"{corpus1} </s></s> {corpus2}"
    result = contradiction_pipeline(input_text)

    return result


In [84]:
df = pd.read_csv(r"C:\Users\RedHat\Downloads\Tiiqu\Contra.csv")
df = df.iloc[:, 2:4]

In [85]:
df

Unnamed: 0,Statement,Contradictory statement
0,According to Krueger and Gibbs in their 2007 b...,"Meadows, Raiders, and Meadows in their 2004 bo..."
1,"According to Step, Slava, and Meredith, the im...",The Environmental Subnets curve suggests that ...
2,Caporale et al. used a fractional integration ...,The Environmental Subnets curve suggests that ...
3,Rite et al. examined the consistency in estima...,The Environmental Subnets curve suggests that ...
4,Shabbat et al. conducted a time-varying analys...,The Environmental Subnets curve suggests that ...
...,...,...
65,Solar panels typically last around 25 to 30 ye...,Experts suggest billions of solar panels will ...
66,"Solar panels degrade over time, producing less...",Solar panels are expected to last a long time ...
67,"There is a concern about a potential ""waste mo...",Recycling options for end-of-life solar panels...
68,Current recycling infrastructure for solar pan...,Efforts are underway to develop and scale up s...


In [106]:
invalid_rows = df[df['Contradictory statement '] == "There is no direct contradictory statement in the provided list."]
df_cleaned = df.drop(invalid_rows.index)
# Reset the index
df_cleaned.reset_index(drop=True, inplace=True)
df_cleaned

Unnamed: 0,Statement,Contradictory statement
0,According to Krueger and Gibbs in their 2007 b...,"Meadows, Raiders, and Meadows in their 2004 bo..."
1,"According to Step, Slava, and Meredith, the im...",The Environmental Subnets curve suggests that ...
2,Caporale et al. used a fractional integration ...,The Environmental Subnets curve suggests that ...
3,Rite et al. examined the consistency in estima...,The Environmental Subnets curve suggests that ...
4,Shabbat et al. conducted a time-varying analys...,The Environmental Subnets curve suggests that ...
5,The Environmental Subnets curve suggests that ...,"According to Step, Slava, and Meredith, the im..."
6,Women are more sensitive to the effects of cli...,The participation of women in parliament is cr...
7,High disruptive potential can generally be ach...,Pull measures are strategies that attract indi...
8,If policies lead to a phase-out of fossil fuel...,It is essential to time regulatory measures or...
9,The declining rural population in Greece has a...,The fragmentation of agricultural holdings and...


In [78]:
df_cleaned['corpus1_prepped'] = df_cleaned['Statement'].astype(str).apply(preprocess_text)
df_cleaned['corpus2_prepped'] = df_cleaned['Contradictory statement '].astype(str).apply(preprocess_text)

In [79]:
# Apply the function to the DataFrame
df_cleaned['Contradictory Score 1'] = df_cleaned.apply(lambda row: get_contradiction_score(row['Statement'], 
                                                                                    row['Contradictory statement ']), axis=1)


In [81]:
# Apply the function to the DataFrame
df_cleaned['Contradictory Score (prep)'] = df_cleaned.apply(lambda row: get_contradiction_score(row['corpus1_prepped'], 
                                                                                    row['corpus2_prepped']), axis=1)

In [82]:
df_cleaned

Unnamed: 0,Statement,Contradictory statement,corpus1_prepped,corpus2_prepped,Contradictory Score 1,Contradictory Score (prep)
0,According to Krueger and Gibbs in their 2007 b...,"Meadows, Raiders, and Meadows in their 2004 bo...",according krueger gibbs 2007 book sustainable ...,meadow raider meadow 2004 book limit growth 30...,"[{'label': 'CONTRADICTION', 'score': 0.8791942...","[{'label': 'CONTRADICTION', 'score': 0.8664186..."
1,"According to Step, Slava, and Meredith, the im...",The Environmental Subnets curve suggests that ...,according step slava meredith impact economic ...,environmental subnets curve suggests environme...,"[{'label': 'NEUTRAL', 'score': 0.9692577719688...","[{'label': 'NEUTRAL', 'score': 0.4416784644126..."
2,Caporale et al. used a fractional integration ...,The Environmental Subnets curve suggests that ...,caporale et al used fractional integration coi...,environmental subnets curve suggests environme...,"[{'label': 'NEUTRAL', 'score': 0.7406325936317...","[{'label': 'CONTRADICTION', 'score': 0.4790753..."
3,Rite et al. examined the consistency in estima...,The Environmental Subnets curve suggests that ...,rite et al examined consistency estimation res...,environmental subnets curve suggests environme...,"[{'label': 'NEUTRAL', 'score': 0.9445784091949...","[{'label': 'CONTRADICTION', 'score': 0.4012946..."
4,Shabbat et al. conducted a time-varying analys...,The Environmental Subnets curve suggests that ...,shabbat et al conducted timevarying analysis r...,environmental subnets curve suggests environme...,"[{'label': 'NEUTRAL', 'score': 0.9749519228935...","[{'label': 'CONTRADICTION', 'score': 0.4408214..."
5,The Environmental Subnets curve suggests that ...,"According to Step, Slava, and Meredith, the im...",environmental subnets curve suggests environme...,according step slava meredith impact economic ...,"[{'label': 'NEUTRAL', 'score': 0.8662146329879...","[{'label': 'ENTAILMENT', 'score': 0.4339882731..."
6,Women are more sensitive to the effects of cli...,The participation of women in parliament is cr...,woman sensitive effect climate change tend fee...,participation woman parliament crucial reducin...,"[{'label': 'NEUTRAL', 'score': 0.9756079912185...","[{'label': 'NEUTRAL', 'score': 0.7121676206588..."
7,High disruptive potential can generally be ach...,Pull measures are strategies that attract indi...,high disruptive potential generally achieved p...,pull measure strategy attract individual towar...,"[{'label': 'CONTRADICTION', 'score': 0.6128178...","[{'label': 'CONTRADICTION', 'score': 0.9083954..."
8,If policies lead to a phase-out of fossil fuel...,It is essential to time regulatory measures or...,policy lead phaseout fossil fuel car endoflife...,essential time regulatory measure economic tax...,"[{'label': 'NEUTRAL', 'score': 0.9545585513114...","[{'label': 'NEUTRAL', 'score': 0.5586739778518..."
9,The declining rural population in Greece has a...,The fragmentation of agricultural holdings and...,declining rural population greece significant ...,fragmentation agricultural holding ageing huma...,"[{'label': 'NEUTRAL', 'score': 0.8899252414703...","[{'label': 'NEUTRAL', 'score': 0.6017358899116..."


In [83]:
df_cleaned.to_csv('Random QA Contra_List_scoring 7-7-24.csv')

In [107]:
#df_cleaned.to_csv('Dataset after removing unwanted rows 7-7-24.csv')