In [None]:
import numpy as np
from transformers import AutoTokenizer
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.utils import np_utils

# Load the eligibility criteria text file
filename = "concatenated_text.txt"

import chardet

# Detect the encoding of the file
with open(filename, 'rb') as f:
    result = chardet.detect(f.read())

# Load the file with the detected encoding
raw_text = open(filename, encoding=result['encoding']).read()

# Create a mapping of unique characters to integers
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))

# Prepare the data for training
seq_length = 50
dataX = []
dataY = []
for i in range(0, len(raw_text) - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)

# Reshape the input data
X = np.reshape(dataX, (n_patterns, seq_length, 1))

# Normalize the input data
X = X / float(len(chars))

# One-hot encode the output data
y = np_utils.to_categorical(dataY)

# Define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

# Train the model
model.fit(X, y, epochs=5, batch_size=128)

# Use the trained model to predict the eligibility criteria
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

def predict_eligibility_criteria(model, tender_text):
    # Encode the input text
    encoded_text = tokenizer.encode_plus(tender_text, max_length=50, padding='max_length', truncation=True, return_tensors='pt')
    x_input = encoded_text['input_ids'].numpy().squeeze()
    x_input = np.reshape(x_input, (1, seq_length, 1))
    x_input = x_input / float(len(chars))
    # Generate the predicted output
    y_output = model.predict(x_input, verbose=0)
    # Convert the predicted output to text
    index = np.argmax(y_output)
    result = chars[index]
    return result

model.save("eligibility_criteria_model.h5")
from keras.models import load_model
model = load_model("eligibility_criteria_model.h5")

def predict_eligibility_criteria(model, tender_text):
    # Encode the input text
    encoded_text = tokenizer.encode_plus(tender_text, max_length=50, padding='max_length', truncation=True, return_tensors='pt')
    x_input = encoded_text['input_ids'].numpy().squeeze()
    x_input = np.reshape(x_input, (1, seq_length, 1))
    x_input = x_input / float(len(chars))
    # Generate the predicted output
    y_output = model.predict(x_input, verbose=0)
    # Convert the predicted output to text
    index = np.argmax(y_output)
    result = chars[index]
    return result
