<a href="https://colab.research.google.com/github/nehaMhr/Business-Contract-Validation/blob/main/Business_Contract_Validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load spaCy model for NLP
nlp = spacy.load('en_core_web_sm')

# URL of the CSV file on Google Drive
url = 'https://drive.google.com/uc?id=1YhKfJWfnWQ67rSXfraB9cbJEUsVH8qwF'

# Load the CSV file into a DataFrame
supplier_contracts = pd.read_csv(url)

# Display the first few rows of the DataFrame to understand its structure
print(supplier_contracts.head())

# Function to preprocess text
def preprocess_text(text):
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
    return ' '.join(tokens)

# Preprocess the text data
supplier_contracts['processed_contract'] = supplier_contracts['contract'].apply(preprocess_text)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    supplier_contracts['processed_contract'], supplier_contracts['label'], test_size=0.3, random_state=42
)

# Vectorize the text data
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train a logistic regression model
model = LogisticRegression()
model.fit(X_train_vec, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_vec)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(classification_report(y_test, y_pred))
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

# Function to highlight deviations in the contract
def highlight_deviations(contract, model, vectorizer):
    processed_contract = preprocess_text(contract)
    X_contract = vectorizer.transform([processed_contract])
    prediction = model.predict(X_contract)

    if prediction == 1:
        print("Deviation detected:")
        doc = nlp(contract)
        for sentence in doc.sents:
            processed_sentence = preprocess_text(sentence.text)
            X_sentence = vectorizer.transform([processed_sentence])
            if model.predict(X_sentence) == 1:
                print(f"-> {sentence.text.strip()}")
    else:
        print("No deviations detected.")

# Test highlighting deviations in a sample contract
sample_contract = """
This Contract is made on [date] between [Party A] and [Party B]. The parties agree as follows:
1. Scope of Work: [Description of work].
2. Payment Terms: [Payment terms].
3. Confidentiality: [Confidentiality terms].
4. Term and Termination: [Term and termination terms].
5. Governing Law: [Governing law].
"""

highlight_deviations(sample_contract, model, vectorizer)


   Contract Number                  Contract Title  \
0     3.371000e+03              DNA Kits, reagents   
1     7.996000e+03  CHF-GA-Mission YMCA Summer and   
2     3.701900e+04                             NaN   
3     4.068000e+04        OCAREDSecuritySVC1515SVN   
4     1.000000e+09  FEES FOR ENERGY TRACKING - CLP   

                                     Contract Type  \
0                              Purchasing Contract   
1                                           Grants   
2       Professional Services and P-Form Contracts   
3  Purchasing Contract - Term Contract Commodities   
4                              Purchasing Contract   

                                Purchasing Authority Term Start Date  \
0                                    NO OTHER SOURCE      2017/07/17   
1     AUTHORIZED BY GRANT - NOT PURCHASING AUTHORITY      2018/07/01   
2  TERM CONTRACT PROFSERV-BID, ENVIRONMENTAL CONS...      2018/11/01   
3                           COMPETITIVE SOLICITATION      2019

KeyError: 'contract'