In [None]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
import keras
import tensorflow as tf
import numpy as np
from keras import layers
import pandas as pd
from dotenv import load_dotenv
from google.cloud import bigquery
from google.cloud import storage
import fire
import hypertune
import time
import string
import re

<hr>

# Define Variables

In [None]:
load_dotenv()

In [None]:
PROJECT_ID = os.environ["PROJECT_ID"]

In [None]:
!gsutil mb "gs://{PROJECT_ID}"

In [None]:
REGION = "us-central1"
ARTIFACT_STORE = f"gs://{PROJECT_ID}/detect-llm"
DATA_ROOT = f"{ARTIFACT_STORE}/data"
JOB_DIR_ROOT = f"{ARTIFACT_STORE}/jobs"
API_ENDPOINT = f"{REGION}-aiplatform.googleapis.com"

In [None]:
TIMESTAMP = time.strftime("%Y%m%d_%H%M%S")
JOB_NAME = f"detect_llm_{TIMESTAMP}"
JOB_DIR = f"{JOB_DIR_ROOT}/{JOB_NAME}"

In [None]:
TRAINING_FILE_PATH = f"{DATA_ROOT}/training/train_df.csv"
VALIDATION_FILE_PATH = f"{DATA_ROOT}/validation/validation_df.csv"
TEST_FILE_PATH = f"{DATA_ROOT}/test/test_df.csv"
API_ENDPOINT = f"{REGION}-aiplatform.googleapis.com"

In [None]:
os.environ["JOB_DIR_ROOT"] = JOB_DIR_ROOT
os.environ["TRAINING_FILE_PATH"] = TRAINING_FILE_PATH
os.environ["VALIDATION_FILE_PATH"] = VALIDATION_FILE_PATH
os.environ["TEST_FILE_PATH"] = TEST_FILE_PATH
os.environ["REGION"] = REGION
os.environ["JOB_NAME"] = JOB_NAME
os.environ["JOB_DIR"] = JOB_DIR

<hr>

# Create Dataset

In [None]:
bigquery_client = bigquery.Client(project=PROJECT_ID)

In [None]:
bigquery_client.query(f"SELECT * FROM `{PROJECT_ID}.detect_llm_ds_bq.raw_data` LIMIT 100").result().to_dataframe()

In [None]:
job_config = bigquery.QueryJobConfig(destination=f"{PROJECT_ID}.detect_llm_ds_bq.shuffle_raw",write_disposition="WRITE_TRUNCATE")
sql = f'SELECT * \
FROM `{PROJECT_ID}.detect_llm_ds_bq.raw_data` ORDER BY RAND()'
query_job = bigquery_client.query(sql, job_config=job_config)  
query_job.result()  
print("Done")

In [None]:
job_config = bigquery.QueryJobConfig(destination=f"{PROJECT_ID}.detect_llm_ds_bq.training",write_disposition="WRITE_TRUNCATE")
sql = f'SELECT * \
FROM `{PROJECT_ID}.detect_llm_ds_bq.shuffle_raw` AS train \
WHERE \
MOD(ABS(FARM_FINGERPRINT(TO_JSON_STRING(train))), 12) IN (0, 2, 3, 4 ,5,6,7,8,9,10,11)'
query_job = bigquery_client.query(sql, job_config=job_config)  
query_job.result()  
print("Done")

In [None]:
job_config = bigquery.QueryJobConfig(destination=f"{PROJECT_ID}.detect_llm_ds_bq.test",write_disposition="WRITE_TRUNCATE")
sql = f'SELECT * \
FROM `{PROJECT_ID}.detect_llm_ds_bq.shuffle_raw` AS train \
WHERE \
MOD(ABS(FARM_FINGERPRINT(TO_JSON_STRING(train))), 12) IN (1,12)'
query_job = bigquery_client.query(sql, job_config=job_config)  
query_job.result()  
print("Done")

In [None]:
job_config = bigquery.QueryJobConfig(destination=f"{PROJECT_ID}.detect_llm_ds_bq.validation",write_disposition="WRITE_TRUNCATE")
sql = f'SELECT * \
FROM `{PROJECT_ID}.detect_llm_ds_bq.training` AS train \
WHERE \
MOD(ABS(FARM_FINGERPRINT(TO_JSON_STRING(train))), 12) IN (11,12)'
query_job = bigquery_client.query(sql, job_config=job_config)  
query_job.result()  
print("Done")

In [None]:
job_config = bigquery.QueryJobConfig(destination=f"{PROJECT_ID}.detect_llm_ds_bq.training",write_disposition="WRITE_TRUNCATE")
sql = f'SELECT * \
FROM `{PROJECT_ID}.detect_llm_ds_bq.training` AS train \
WHERE \
MOD(ABS(FARM_FINGERPRINT(TO_JSON_STRING(train))), 12) IN (0,1,2,3,4,5,6,7,8,9,10)'
query_job = bigquery_client.query(sql, job_config=job_config)  
query_job.result()  
print("Done")

In [None]:
project = PROJECT_ID
dataset_id = "detect_llm_ds_bq"
table_id = "training"
destination_uri = TRAINING_FILE_PATH
extract_job = bigquery_client.extract_table(
    f"{PROJECT_ID}.detect_llm_ds_bq.training",
    destination_uri,
    # Location must match that of the source table.
    location="US",
)  # API request
extract_job.result()  # Waits for job to complete.
print("Done")


In [None]:
project = PROJECT_ID
dataset_id = "detect_llm_ds_bq"
table_id = "validation"
destination_uri = VALIDATION_FILE_PATH
extract_job = bigquery_client.extract_table(
    f"{PROJECT_ID}.detect_llm_ds_bq.validation",
    destination_uri,
    # Location must match that of the source table.
    location="US",
)  # API request
extract_job.result()  # Waits for job to complete.
print("Done")


In [None]:
project = PROJECT_ID
dataset_id = "detect_llm_ds_bq"
table_id = "test"
destination_uri = TEST_FILE_PATH
extract_job = bigquery_client.extract_table(
    f"{PROJECT_ID}.detect_llm_ds_bq.test",
    destination_uri,
    # Location must match that of the source table.
    location="US",
)  # API request
extract_job.result()  # Waits for job to complete.
print("Done")


In [None]:
!gsutil ls gs://$PROJECT_ID/detect-llm/data/

<hr>

# Create Training Code

In [None]:
train0 = pd.read_csv(TRAINING_FILE_PATH)
val0 = pd.read_csv(VALIDATION_FILE_PATH)
test0 = pd.read_csv(TEST_FILE_PATH)

In [None]:
train0.label.value_counts()

In [None]:
val0.label.value_counts()

In [None]:
test0.label.value_counts()

In [None]:
seed = 2 
dataset_tr = tf.data.Dataset.from_tensor_slices((train0.text.values,train0.label.values.astype("float32") ))
dataset_tr = dataset_tr.shuffle(buffer_size=len(train0)).batch(batch_size=2)

dataset_val = tf.data.Dataset.from_tensor_slices((val0.text.values,val0.label.values.astype("float32")  ))
dataset_val = dataset_val.shuffle(buffer_size=len(val0)).batch(batch_size=2)

dataset_test = tf.data.Dataset.from_tensor_slices((test0.text.values ))
dataset_test = dataset_test.batch(batch_size=2)


In [None]:

def custom_standardization(input_data):
    lowercase = tf.strings.lower(input_data)
    stripped_html = tf.strings.regex_replace(lowercase, "<br />", " ")
    return tf.strings.regex_replace(
        stripped_html, f"[{re.escape(string.punctuation)}]", ""
    )
max_features = 5000
embedding_dim = 64
sequence_length = 64
vectorize_layer = keras.layers.TextVectorization(
    standardize=custom_standardization,
    max_tokens=max_features,
    output_mode="int",
    output_sequence_length=sequence_length,
)
text_ds = dataset_tr.map(lambda x, y: x).concatenate(dataset_val.map(lambda x, y: x)).concatenate(dataset_test)
vectorize_layer.adapt(text_ds)

In [None]:
def vectorize_text(text, label):
    text = tf.expand_dims(text, -1)
    return vectorize_layer(text), label
def vectorize_text_test(text):
    text = tf.expand_dims(text, -1)
    return vectorize_layer(text)
train_ds = dataset_tr.map(vectorize_text)
train_ds = train_ds.cache().prefetch(buffer_size=10)

val_ds = dataset_val.map(vectorize_text)
val_ds = val_ds.cache().prefetch(buffer_size=10)

test_ds = dataset_test.map(vectorize_text_test)
test_ds = test_ds.cache().prefetch(buffer_size=10)

In [None]:
inputs = keras.Input(shape=(None,), dtype="int64")
x = layers.Embedding(max_features, embedding_dim)(inputs)
x = layers.Dropout(0.5)(x)
x = layers.Conv1D(32, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.Conv1D(32, 7, padding="valid", activation="relu", strides=3)(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(32, activation="relu")(x)
x = layers.Dropout(0.5)(x)

predictions = layers.Dense(1, activation="sigmoid", name="predictions")(x)
model = keras.Model(inputs, predictions)
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy",tf.keras.metrics.AUC()])

In [None]:
num_cpus = os.cpu_count()

In [None]:
epochs = 1
tf.random.set_seed(1)
model.fit(train_ds, validation_data=val_ds, epochs=epochs,workers=num_cpus)

In [None]:
model.evaluate(val_ds)

In [None]:
preds = model.predict(test_ds)

In [None]:
from sklearn.metrics import roc_auc_score

In [None]:
roc_auc_score(test0.label,preds)

In [None]:
test.text[123]

In [None]:
inputs = keras.Input(shape=(1,), dtype="string")
indices = vectorize_layer(inputs)
outputs = model(indices)
end_to_end_model = keras.Model(inputs, outputs)
end_to_end_model.compile(
    loss="binary_crossentropy", optimizer="adam", metrics=["accuracy",tf.keras.metrics.AUC()]
)
end_to_end_preds = end_to_end_model.predict([test.text[123]])

In [None]:
test.label[123],end_to_end_preds.round(3)

In [None]:
end_to_end_model.save("end_to_end_detect_llm.keras",save_format="keras")

```python
def custom_standardization(input_data):
    lowercase = tf.strings.lower(input_data)
    stripped_html = tf.strings.regex_replace(lowercase, "<br />", " ")
    return tf.strings.regex_replace(
        stripped_html, f"[{re.escape(string.punctuation)}]", ""
    )
```

In [None]:
loaded_model = keras.models.load_model("end_to_end_detect_llm.keras"
                                      ,custom_objects={"custom_standardization":custom_standardization})

In [None]:
end_to_end_model.predict(["If you are someone who likes to go exploring and work a little then here is the job for you. The reason you should join this program is because on your extra time you can go sight seeing and look at what you will be passing on the jorney from place to place. The task you are asked to do is simple and you can do a variety of things on your way back home.\n\nIf you join this program then you could travel half way around the world, see different things, be in the millitary, help people, and work all at the same time. When you join this program you can see things like ruins and famos statues. Luke joined and look what he seen. He seen Europe, China, an excavated castle, and the Panama Canal. You don't see those things on an average day. If you join you can see all these things and have a life time expereince.\n\nWhen you are on this voage all you have to do is feed the animals and water them two or three times a day. The bales of hay and bags of oats had to be pulled up from the lower holds of the ship, so you might need some mucles. You also have to clean the stalls daily, which I know is a nasty job but someone has to do it. You also get the benefit of hellping families in need.\n\nOn the way back home you can also have fun by playing games with your friends in the empty stalls. It might not be the most sanatairy place but what else is there to do. Luke says that he and his friends play: Table-tennis, fencing, boxing,voley ball, base ball, and reading. This helped pass the time of the long jorney back home. This program could be fun even if you are in a ship and in animal stalls.\n\nThis why I think that joining this program would be good. You get to go sighting seeing, work, and have fun at the same time. Who would want to miss this opportunity of a life time like this one.         "])

In [None]:
loaded_model.predict(["If you are someone who likes to go exploring and work a little then here is the job for you. The reason you should join this program is because on your extra time you can go sight seeing and look at what you will be passing on the jorney from place to place. The task you are asked to do is simple and you can do a variety of things on your way back home.\n\nIf you join this program then you could travel half way around the world, see different things, be in the millitary, help people, and work all at the same time. When you join this program you can see things like ruins and famos statues. Luke joined and look what he seen. He seen Europe, China, an excavated castle, and the Panama Canal. You don't see those things on an average day. If you join you can see all these things and have a life time expereince.\n\nWhen you are on this voage all you have to do is feed the animals and water them two or three times a day. The bales of hay and bags of oats had to be pulled up from the lower holds of the ship, so you might need some mucles. You also have to clean the stalls daily, which I know is a nasty job but someone has to do it. You also get the benefit of hellping families in need.\n\nOn the way back home you can also have fun by playing games with your friends in the empty stalls. It might not be the most sanatairy place but what else is there to do. Luke says that he and his friends play: Table-tennis, fencing, boxing,voley ball, base ball, and reading. This helped pass the time of the long jorney back home. This program could be fun even if you are in a ship and in animal stalls.\n\nThis why I think that joining this program would be good. You get to go sighting seeing, work, and have fun at the same time. Who would want to miss this opportunity of a life time like this one.         "])

<hr>

# Define Training Code and Docker Image

In [None]:
TRAINING_APP_FOLDER = "training_app_trees"
os.makedirs(TRAINING_APP_FOLDER, exist_ok=True)

In [None]:
%%writefile {TRAINING_APP_FOLDER}/train.py
import os
import subprocess
import sys
os.environ["KERAS_BACKEND"] = "tensorflow"
import tensorflow as tf
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from google.cloud import bigquery
from google.cloud import storage
import fire
import hypertune
import string
import re
from sklearn.metrics import roc_auc_score
import time
import keras
from keras import layers

def train_evaluate(training_path,val_path,test_path,job_dir, hidden_dim, dropout, embedding_dim ,sequence_length ,max_features, hptune): 
    
    train0 = pd.read_csv(training_path)
    val0 = pd.read_csv(val_path)
    test0 = pd.read_csv(test_path)
    dataset_tr = tf.data.Dataset.from_tensor_slices((train0.text.values,train0.label.values.astype("float32") ))
    dataset_tr = dataset_tr.shuffle(buffer_size=len(train0)).batch(batch_size=2)
    dataset_val = tf.data.Dataset.from_tensor_slices((val0.text.values,val0.label.values.astype("float32")  ))
    dataset_val = dataset_val.shuffle(buffer_size=len(val0)).batch(batch_size=2)
    dataset_test = tf.data.Dataset.from_tensor_slices((test0.text.values ))
    dataset_test = dataset_test.batch(batch_size=2)
    def custom_standardization(input_data):
        lowercase = tf.strings.lower(input_data)
        stripped_html = tf.strings.regex_replace(lowercase, "<br />", " ")
        return tf.strings.regex_replace(
            stripped_html, f"[{re.escape(string.punctuation)}]", ""
        )
    max_features = int(max_features)
    embedding_dim = int(embedding_dim)
    sequence_length = int(sequence_length)
    vectorize_layer = keras.layers.TextVectorization(
        standardize=custom_standardization,
        max_tokens=max_features,
        output_mode="int",
        output_sequence_length=sequence_length,
    )
    text_ds = dataset_tr.map(lambda x, y: x).concatenate(dataset_val.map(lambda x, y: x)).concatenate(dataset_test)
    vectorize_layer.adapt(text_ds)
    
    def vectorize_text(text, label):
        text = tf.expand_dims(text, -1)
        return vectorize_layer(text), label
    def vectorize_text_test(text):
        text = tf.expand_dims(text, -1)
        return vectorize_layer(text)
    train_ds = dataset_tr.map(vectorize_text)
    train_ds = train_ds.cache().prefetch(buffer_size=10)
    
    val_ds = dataset_val.map(vectorize_text)
    val_ds = val_ds.cache().prefetch(buffer_size=10)
    
    test_ds = dataset_test.map(vectorize_text_test)
    test_ds = test_ds.cache().prefetch(buffer_size=10)
    
    inputs = keras.Input(shape=(None,), dtype="int64")
    x = layers.Embedding(max_features, embedding_dim)(inputs)
    x = layers.Dropout(float(dropout))(x)
    x = layers.Conv1D(int(hidden_dim), 7, padding="valid", activation="relu", strides=3)(x)
    x = layers.Conv1D(int(hidden_dim), 7, padding="valid", activation="relu", strides=3)(x)
    x = layers.GlobalMaxPooling1D()(x)
    x = layers.Dense(int(hidden_dim), activation="relu")(x)
    x = layers.Dropout(float(dropout))(x)
    
    predictions = layers.Dense(1, activation="sigmoid", name="predictions")(x)
    model = keras.Model(inputs, predictions)
    model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy",tf.keras.metrics.AUC()])
    
    num_cpus = os.cpu_count()
    
    epochs = 1
    tf.random.set_seed(1)
    model.fit(train_ds, validation_data=val_ds, epochs=epochs,workers=num_cpus)
    
    
    preds = model.predict(test_ds)

    if hptune:
            roc_auc = roc_auc_score(test0.label,preds)
            print('Model roc_auc: {}'.format(roc_auc))
    
            hpt = hypertune.HyperTune()
            hpt.report_hyperparameter_tuning_metric(
              hyperparameter_metric_tag='roc_auc',
              metric_value=roc_auc
            )
    
    # Save the model
    if not hptune:
        model_filename = "end_to_end_detect_llm"
        inputs = keras.Input(shape=(1,), dtype="string")
        indices = vectorize_layer(inputs)
        outputs = model(indices)
        end_to_end_model = keras.Model(inputs, outputs)
        end_to_end_model.compile(
            loss="binary_crossentropy", optimizer="adam", metrics=["accuracy",tf.keras.metrics.AUC()]
        )
        # end_to_end_model.save(model_filename,save_format="keras")
        tf.saved_model.save(
        obj=end_to_end_model, export_dir=model_filename)
    
        gcs_model_path = "{}/{}".format(job_dir, model_filename)
        subprocess.check_call(['gsutil', 'cp', '-r', model_filename, gcs_model_path], stderr=sys.stdout)
        print("Saved model in: {}".format(gcs_model_path)) 
if __name__ == "__main__":
        fire.Fire(train_evaluate)

In [None]:
TRAINING_FILE_PATH

In [None]:
!python training_app_trees/train.py --training_path $TRAINING_FILE_PATH --val_path $VALIDATION_FILE_PATH --test_path $TEST_FILE_PATH --job-dir $JOB_DIR --hidden_dim 32 --dropout 0.5 --embedding_dim 32 --sequence_length 128 --max_features 5000 --hptune False

In [None]:
!gsutil ls $JOB_DIR/*

In [None]:
!gsutil rm -r $JOB_DIR/*

In [None]:
%%writefile {TRAINING_APP_FOLDER}/Dockerfile

FROM gcr.io/deeplearning-platform-release/tf-cpu.2-13
RUN pip install -U pandas==2.1.4 numpy==1.24.3 fire cloudml-hypertune scikit-learn==1.3.2 fsspec gcsfs
WORKDIR /app
COPY train.py .

ENTRYPOINT ["python", "train.py"]

In [None]:
IMAGE_NAME = "detect_llm_trainer_image"
IMAGE_TAG = "latest"
IMAGE_URI = f"gcr.io/{PROJECT_ID}/{IMAGE_NAME}:{IMAGE_TAG}"

os.environ["IMAGE_URI"] = IMAGE_URI

In [None]:
IMAGE_URI

In [None]:
!gcloud services enable cloudbuild.googleapis.com

In [None]:
!gcloud builds submit --tag $IMAGE_URI $TRAINING_APP_FOLDER

<hr>

# Create Hyperparameter Tuning Job

In [None]:
!gcloud services enable aiplatform.googleapis.com

In [None]:
%%bash
MACHINE_TYPE="n1-standard-4"
REPLICA_COUNT=1
CONFIG_YAML=config.yaml

cat <<EOF > $CONFIG_YAML
studySpec:
  metrics:
  - metricId: roc_auc
    goal: MAXIMIZE
  parameters:
  - parameterId: hidden_dim
    discreteValueSpec:
      values:
      - 96
      - 128
      - 256
  - parameterId: dropout
    discreteValueSpec:
      values:
      - 0.1
      - 0.2
      - 0.5
  - parameterId: embedding_dim
    discreteValueSpec:
      values:
      - 96
      - 128
  - parameterId: sequence_length
    discreteValueSpec:
      values:
      - 250
      - 500
  - parameterId: max_features
    discreteValueSpec:
      values:
      - 15000
      - 20000

  algorithm: ALGORITHM_UNSPECIFIED # results in Bayesian optimization
trialJobSpec:
  workerPoolSpecs:  
  - machineSpec:
      machineType: $MACHINE_TYPE
    replicaCount: $REPLICA_COUNT
    containerSpec:
      imageUri: $IMAGE_URI
      args:
      - --training_path=$TRAINING_FILE_PATH 
      - --val_path=$VALIDATION_FILE_PATH 
      - --test_path=$TEST_FILE_PATH 
      - --job-dir=$JOB_DIR 
      - --hptune
EOF

gcloud ai hp-tuning-jobs create \
    --region=$REGION \
    --display-name=$JOB_NAME \
    --config=$CONFIG_YAML \
    --max-trial-count=1 \
    --parallel-trial-count=1

echo "JOB_NAME: $JOB_NAME"

In [None]:
!gcloud ai hp-tuning-jobs describe YOUR_JOB_NUMBER --region=us-central1

<hr>

# Create Custom Training Job

In [None]:
from google.cloud import aiplatform, bigquery

In [None]:
def get_trials(job_name):
    jobs = aiplatform.HyperparameterTuningJob.list()
    match = [job for job in jobs if job.display_name == JOB_NAME]
    tuning_job = match[0] if match else None
    return tuning_job.trials if tuning_job else None


def get_best_trial(trials):
    metrics = [trial.final_measurement.metrics[0].value for trial in trials]
    best_trial = trials[metrics.index(max(metrics))]
    return best_trial


def retrieve_best_trial_from_job_name(jobname):
    trials = get_trials(jobname)
    best_trial = get_best_trial(trials)
    return best_trial

In [None]:
best_trial = retrieve_best_trial_from_job_name(JOB_NAME)

In [None]:
dropout = best_trial.parameters[0].value
embedding_dim = best_trial.parameters[1].value
hidden_dim = best_trial.parameters[2].value
max_features = best_trial.parameters[3].value
sequence_length = best_trial.parameters[4].value
score = best_trial.final_measurement.metrics[0].value
score

In [None]:
TIMESTAMP = time.strftime("%Y%m%d_%H%M%S")
JOB_NAME = f"detect_llm_{TIMESTAMP}"
JOB_DIR = f"{JOB_DIR_ROOT}/{JOB_NAME}"

MACHINE_TYPE="n1-standard-4"
REPLICA_COUNT=1

WORKER_POOL_SPEC = f"""\
machine-type={MACHINE_TYPE},\
replica-count={REPLICA_COUNT},\
container-image-uri={IMAGE_URI}\
"""

ARGS = f"""\
--job_dir={JOB_DIR},\
--training_path={TRAINING_FILE_PATH},\
--val_path={VALIDATION_FILE_PATH},\
--test_path={VALIDATION_FILE_PATH},\
--dropout={dropout},\
--embedding_dim={embedding_dim},\
--hidden_dim={hidden_dim},\
--max_features={max_features},\
--sequence_length={sequence_length},\
--nohptune\
"""

!gcloud ai custom-jobs create \
  --region={REGION} \
  --display-name={JOB_NAME} \
  --worker-pool-spec={WORKER_POOL_SPEC} \
  --args={ARGS}

print("The model will be exported at:", JOB_DIR)

<hr>

# Deploy Model to Endpoint

In [None]:
MODEL_NAME = "detect_llm_classifier"
SERVING_CONTAINER_IMAGE_URI = (
    "us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-13:latest"
)
SERVING_MACHINE_TYPE = "n1-standard-2"

In [None]:
!gsutil ls $JOB_DIR

In [None]:
uploaded_model = aiplatform.Model.upload(
    display_name=MODEL_NAME,
    artifact_uri= JOB_DIR+"/end_to_end_detect_llm/", # modelfile_name we defined in train.py
    serving_container_image_uri=SERVING_CONTAINER_IMAGE_URI,
)

In [None]:
endpoint = uploaded_model.deploy(
    machine_type=SERVING_MACHINE_TYPE,
    accelerator_type=None,
    accelerator_count=None,
)

In [None]:
endpoint.predict([["Sample text to predict, this is not generated text but we need student article text to test."]])