In [203]:
!ls ../AI-CPS/code/

ai_training.ipynb      learningBase		       test.ipynb
ai_training_ols.ipynb  learningBase_ols		       tf_model_1.h5
apply_annSolution.py   model.ipynb		       tf_model_1.keras
housing_data.csv       scraping_and_preparation.ipynb


In [266]:
import os
import sys
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers

# ------------------------------------------------------------------------------
# CONFIG: Update paths as needed
# ------------------------------------------------------------------------------
MODEL_PATH = "../AI-CPS/code/sarcasm_model.h5"
ACTIVATION_PATH = "../AI-CPS/code/activation_data.csv"

# ------------------------------------------------------------------------------
# Hyperparameters for vectorization (must match what you used in training)
# ------------------------------------------------------------------------------
MAX_TOKENS = 1000
SEQUENCE_LENGTH = 50

def load_model(model_path: str):
    print(f"Loading model from: {model_path}")
    # Load without compiling to avoid deserializing the metric functions
    return tf.keras.models.load_model(model_path, compile=False)

def load_activation_data(csv_path: str):
    print(f"Loading activation data from: {csv_path}")
    df = pd.read_csv(csv_path)
    return df

def predict_inference(model, df):
    """
    1) Adapt TextVectorization on the single activation file itself.
    2) Vectorize those tweets to integer sequences.
    3) Model expects integer sequences (from its Embedding layer).
    4) Print predictions.
    """
    tweets = df["tweet"].astype(str).values

    # Build a vectorizer that matches your training parameters
    vectorizer = layers.TextVectorization(
        max_tokens=MAX_TOKENS,
        output_mode='int',
        output_sequence_length=SEQUENCE_LENGTH
    )

    # WARNING: Adapting on activation_data.csv alone means the vocabulary
    #          won't match full training. This is purely a demonstration.
    vectorizer.adapt(tweets)

    # Convert raw text to integer sequences
    tweets_vec = vectorizer(tweets)

    # Model expects integer sequences
    predictions = model.predict(tweets_vec)

    # Print predictions
    for i, pred in enumerate(predictions):
        label = "Sarcastic" if pred[0] >= 0.5 else "Not Sarcastic"
        print(f"Tweet: {tweets[i]}")
        print(f"Prediction: {label} (score: {pred[0]:.4f})\n")

if __name__ == "__main__":
    model = load_model(MODEL_PATH)
    df_act = load_activation_data(ACTIVATION_PATH)
    predict_inference(model, df_act)


Loading model from: ../AI-CPS/code/sarcasm_model.h5
Loading activation data from: ../AI-CPS/code/activation_data.csv
Tweet: Overheard as my 13 year old games with a friend: 'You smell like tartare sauce!' #MontyPythonesqueDisses
Prediction: Not Sarcastic (score: 0.4801)



In [230]:
print(tf.__version__)

2.12.0


In [252]:
!unzip -l ../AI-CPS/code/learningBase/sarcasm_model.keras 

Archive:  ../AI-CPS/code/learningBase/sarcasm_model.keras
  Length      Date    Time    Name
---------  ---------- -----   ----
       64  01-01-1980 00:00   metadata.json
     3735  01-01-1980 00:00   config.json
  1121692  02-01-2025 14:16   model.weights.h5
---------                     -------
  1125491                     3 files


In [249]:
!ls

 data				    OLS_model			      'week 4'
 dataset02_testing.csv		    requirements.txt		      'week 5'
 dataset02_training.csv		    scatter_with_regression_line.pdf  'week 6'
'Fahrzeugübersicht 2024 (2).xlsx'   tf_model_1.keras		      'week 7'
 housing_data.csv		    UE_04_App2_BoxPlot.pdf	      'week 8'
 intro_pytorch.ipynb		    Untitled.ipynb		      'week 9'
 learningBase			   'week 2'
 ml2_python3_11			   'week 3'


In [267]:
import os
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import statsmodels.api as sm

# ------------------------------------------------------------------------------
# Paths INSIDE the container:
# We expect the activation data is copied to /tmp/activationBase
# We expect the OLS model is copied to /tmp/knowledgeBase
# ------------------------------------------------------------------------------
MODEL_PATH = "../AI-CPS/code/currentOlsSoluGon.pickle"
ACTIVATION_PATH = "../AI-CPS/code/activation_data.csv"

# ------------------------------------------------------------------------------
# Hyperparameters for vectorization (must match what you used in training)
# ------------------------------------------------------------------------------
MAX_TOKENS = 1000
SEQUENCE_LENGTH = 50

def load_model(model_path: str):
    print(f"Loading OLS model from: {model_path}")
    # Use statsmodels' load function (this assumes your model was saved via results.save(...))
    return sm.load(model_path)

def load_activation_data(csv_path: str):
    print(f"Loading activation data from: {csv_path}")
    df = pd.read_csv(csv_path)
    return df

def predict_inference(ols_results, df):
    """
    For each tweet in the activation data:
      1. Adapt a TextVectorization layer on the tweets (for demonstration).
      2. Convert the raw tweet strings to integer sequences.
      3. Add a constant column (to match the training data).
      4. Use the loaded OLS model to predict.
      5. Print the predictions.
    """
    tweets = df["tweet"].astype(str).values

    # Build a TextVectorization layer (this should match the training configuration)
    vectorizer = layers.TextVectorization(
        max_tokens=MAX_TOKENS,
        output_mode='int',
        output_sequence_length=SEQUENCE_LENGTH
    )
    
    # WARNING: Adapting on the activation data alone means the vocabulary
    # will likely differ from training. This is acceptable here for demonstration.
    vectorizer.adapt(tweets)
    
    # Convert the raw text tweets into integer sequences
    tweets_tensor = vectorizer(tweets)  # This returns a tf.Tensor
    tweets_int = tweets_tensor.numpy()   # Convert to a NumPy array

    # In training, you added a constant column to the integer sequences.
    # Do the same here so the input matches what the OLS model expects.
    X_activation = sm.add_constant(tweets_int, has_constant="add")
    
    # Predict using the loaded OLS model
    predictions = ols_results.predict(X_activation)
    
    # Print predictions
    for i, pred in enumerate(predictions):
        # For demonstration, we threshold at 0.5
        label = "Sarcastic" if pred >= 0.5 else "Not Sarcastic"
        print(f"Tweet: {tweets[i]}")
        print(f"Prediction: {label} (score: {pred:.4f})\n")

if __name__ == "__main__":
    ols_results = load_model(MODEL_PATH)
    df_act = load_activation_data(ACTIVATION_PATH)
    predict_inference(ols_results, df_act)


Loading OLS model from: /tmp/knowledgeBase/currentOlsSoluGon.pickle


FileNotFoundError: [Errno 2] No such file or directory: '/tmp/knowledgeBase/currentOlsSoluGon.pickle'