In [17]:
# Goal: Detect Contradictions Between Government Claims & Ground Reality Reports
# -----------------------------------------------------------------------------
# We'll use a pretrained Natural Language Inference (NLI) model from HuggingFace 
# (facebook/bart-large-mnli) to classify each pair as:
#
# Entailment → Fulfilled     - Ground reality confirms the government claim
# Contradiction → Unfulfilled - Ground reality contradicts the government claim
# Neutral → Unclear          - Insufficient evidence to determine fulfillment

In [1]:
import pandas as pd
from transformers import pipeline

In [None]:
import pandas as pd
from transformers import pipeline
from tqdm import tqdm

# Load NLI pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")



Device set to use cpu


In [3]:
claims_df = pd.read_csv("govt_claims.csv")
reports_df = pd.read_csv("ground_reality.csv")

# Optional: Reduce data for testing
# claims_df = claims_df.head(5)
# reports_df = reports_df.head(5)


In [4]:
merged = pd.merge(claims_df, reports_df, on="scheme", suffixes=('_claim', '_report'))
print("Merged shape:", merged.shape)
merged.head()


Merged shape: (152, 14)


Unnamed: 0,id_claim,scheme,claim_text,state_claim,district_claim,source_claim,date_claim,id_report,report_text,state_report,district_report,source_report,date_report,type
0,1,Har Ghar Jal,5.38 Crore rural households have been provided...,All India,All Districts,PIB,2021-12-05,1,"In Bundelkhand, women still walk 2 km to fetch...",Uttar Pradesh,Banda,WaterAid,2024-03-15,field_report
1,1,Har Ghar Jal,5.38 Crore rural households have been provided...,All India,All Districts,PIB,2021-12-05,2,63.4 million people in rural areas live withou...,All India,All Districts,WaterAid,2024-06-01,survey
2,1,Har Ghar Jal,5.38 Crore rural households have been provided...,All India,All Districts,PIB,2021-12-05,3,Water levels are below their 10-year averages ...,Uttar Pradesh,All Districts,Citizen Matters,2024-05-11,news
3,1,Har Ghar Jal,5.38 Crore rural households have been provided...,All India,All Districts,PIB,2021-12-05,4,Uttar Pradesh is grappling with unabated use o...,Uttar Pradesh,All Districts,Down to Earth,2023-06-12,news
4,1,Har Ghar Jal,5.38 Crore rural households have been provided...,All India,All Districts,PIB,2021-12-05,5,Bihar and Uttar Pradesh had the lowest Water P...,Bihar,All Districts,Economic Political Weekly,2024-08-22,research


In [5]:
def detect_contradiction(premise, hypothesis):
    labels = ["entailment", "contradiction", "neutral"]
    try:
        result = classifier(
            sequences=hypothesis,
            candidate_labels=labels,
            hypothesis_template=premise + " {}"
        )
        return result['labels'][0], result['scores'][0]
    except:
        return "error", 0.0



In [6]:
tqdm.pandas()
merged[['nli_label', 'confidence']] = merged.progress_apply(
    lambda row: pd.Series(detect_contradiction(row['claim_text'], row['report_text'])),
    axis=1
)

100%|██████████| 152/152 [04:10<00:00,  1.65s/it]


In [7]:
merged.to_csv("classified_policy_gap_output.csv", index=False)
merged[['scheme', 'claim_text', 'report_text', 'nli_label', 'confidence']].head()


Unnamed: 0,scheme,claim_text,report_text,nli_label,confidence
0,Har Ghar Jal,5.38 Crore rural households have been provided...,"In Bundelkhand, women still walk 2 km to fetch...",contradiction,0.4583
1,Har Ghar Jal,5.38 Crore rural households have been provided...,63.4 million people in rural areas live withou...,contradiction,0.432932
2,Har Ghar Jal,5.38 Crore rural households have been provided...,Water levels are below their 10-year averages ...,contradiction,0.443941
3,Har Ghar Jal,5.38 Crore rural households have been provided...,Uttar Pradesh is grappling with unabated use o...,contradiction,0.509318
4,Har Ghar Jal,5.38 Crore rural households have been provided...,Bihar and Uttar Pradesh had the lowest Water P...,contradiction,0.447237
