
### In this notebook, all the model development and running steps are found.

In [0]:
import pandas as pd
import numpy as np
import os
import openai
from openai import OpenAI
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from collections import Counter
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
) 
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import classification_report
from torch.utils.data import DataLoader, TensorDataset
from tenacity import retry, wait_random_exponential, stop_after_attempt

In [0]:
MPST_df = pd.read_csv("/dbfs/tmp/model_ready_MPST.csv")
MPST_df.head()
len(MPST_df)

14511

In [0]:
MPST_df.head()

Unnamed: 0,imdb_id,title,plot_synopsis,tags,split,award_label,num_tags,title_length,plot_length,award_label_length
0,tt0057603,I tre volti della paura,Note: this synopsis is for the orginal Italian...,"[cult, horror, gothic, murder, atmospheric]",train,No award,5,23,7527,8
1,tt1733125,Dungeons & Dragons: The Book of Vile Darkness,"Two thousand years ago, Nhagruul the Foul, a s...",[violence],train,No award,1,45,2077,8
2,tt0033045,The Shop Around the Corner,"Matuschek's, a gift store in Budapest, is the ...",[romantic],test,No award,1,26,4003,8
3,tt0113862,Mr. Holland's Opus,"Glenn Holland, not a morning person by anyone'...","[inspiring, romantic, stupid, feel-good]",train,No award,4,18,13215,8
4,tt0086250,Scarface,"In May 1980, a Cuban man named Tony Montana (A...","[cruelty, murder, dramatic, cult, violence...",val,No award,10,8,17575,8


In [0]:
import ast

# Convert the 'tags' column from string representation of lists to actual lists
MPST_df['tags'] = MPST_df['tags'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Extract all unique tags, stripping whitespace and removing duplicates
unique_tags = set()

for tag_list in MPST_df['tags']:
    cleaned_tags = [tag.strip().replace("'", "").replace('"', '') for tag in tag_list]  # Remove extra spaces and quotes
    unique_tags.update(cleaned_tags)

# Convert to a sorted list
fixed_tags = sorted(unique_tags)

In [0]:
fixed_tags
len(fixed_tags)

71


### Baseline model: MLP-BoW

In [0]:
dataset_for_baseline = MPST_df.copy()
dataset_for_baseline['text'] = (
    'Title: ' + dataset_for_baseline['title'] + '. ' +
    'Synopsis: ' + dataset_for_baseline['plot_synopsis'] + '. ' +
    'Award: ' + dataset_for_baseline['award_label'] + '.'
)

In [0]:
dataset_for_baseline.head()

Unnamed: 0,imdb_id,title,plot_synopsis,tags,split,award_label,num_tags,title_length,plot_length,award_label_length,text
0,tt0057603,I tre volti della paura,Note: this synopsis is for the orginal Italian...,"[cult, horror, gothic, murder, atmospheric]",train,No award,5,23,7527,8,Title: I tre volti della paura. Synopsis: Note...
1,tt1733125,Dungeons & Dragons: The Book of Vile Darkness,"Two thousand years ago, Nhagruul the Foul, a s...",[violence],train,No award,1,45,2077,8,Title: Dungeons & Dragons: The Book of Vile Da...
2,tt0033045,The Shop Around the Corner,"Matuschek's, a gift store in Budapest, is the ...",[romantic],test,No award,1,26,4003,8,Title: The Shop Around the Corner. Synopsis: M...
3,tt0113862,Mr. Holland's Opus,"Glenn Holland, not a morning person by anyone'...","[inspiring, romantic, stupid, feel-good]",train,No award,4,18,13215,8,Title: Mr. Holland's Opus. Synopsis: Glenn Hol...
4,tt0086250,Scarface,"In May 1980, a Cuban man named Tony Montana (A...","[cruelty, murder, dramatic, cult, violence...",val,No award,10,8,17575,8,"Title: Scarface. Synopsis: In May 1980, a Cuba..."


In [0]:
print(dataset_for_baseline['tags'].isna().sum())  # Count NaN values


0


In [0]:
FALLBACK_THRESHOLD = 0.2

mlb = MultiLabelBinarizer(classes=fixed_tags)
y = mlb.fit_transform(dataset_for_baseline['tags'])

#Create Bag-of-Words representation using the 50,000 most frequent words
vectorizer = CountVectorizer(max_features=50000, stop_words='english')  #Limit to the 50,000 most frequent words
X = vectorizer.fit_transform(dataset_for_baseline['text'])

# Combine train and validation data for training
X_train = X[dataset_for_baseline['split'].isin(['train', 'val'])]
y_train = y[dataset_for_baseline['split'].isin(['train', 'val'])]
X_test = X[dataset_for_baseline['split'] == 'test']
y_test = y[dataset_for_baseline['split'] == 'test']

# convert to tensors for PyTorch
X_train_tensor = torch.tensor(X_train.toarray(), dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.toarray(), dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# create datasets and loaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# define the MLP model with L2 normalization in each hidden layer, dropout, and the specified number of nodes
class MLPWithL2Normalization(nn.Module):
    def __init__(self, input_size, output_size):
        super(MLPWithL2Normalization, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)  # First hidden layer with 512 nodes
        self.fc2 = nn.Linear(512, 256)  # Second hidden layer with 256 nodes
        self.fc3 = nn.Linear(256, output_size)  # Output layer
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)  # Dropout rate

    def forward(self, x):
        x = self.fc1(x)
        x = nn.functional.normalize(x, p=2, dim=1)  # L2 normalization
        x = self.relu(x)
        x = self.dropout(x)  # apply dropout
        
        x = self.fc2(x)
        x = nn.functional.normalize(x, p=2, dim=1)  # L2 normalization
        x = self.relu(x)
        x = self.dropout(x)  # apply dropout
        
        x = self.fc3(x)
        return torch.sigmoid(x)  #output layer with sigmoid for multi-label classification

#instantiate and train the model
input_size = X_train_tensor.shape[1]
output_size = y_train_tensor.shape[1]
model = MLPWithL2Normalization(input_size, output_size)

#loss and optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy for multi-label classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) 

# 12 epochs
num_epochs = 12
for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()

model.eval()
y_pred_proba = model(X_test_tensor).detach().numpy()  # Predict probabilities on test set
y_pred_with_fallback = []

for i in range(len(y_pred_proba)):
    sample_proba = y_pred_proba[i]
    
    sorted_tags = sorted(enumerate(sample_proba), key=lambda x: x[1], reverse=True)
    
    #select tags above the fallback threshold
    high_prob_tags = [
        mlb.classes_[j] for j, prob in sorted_tags
        if prob >= FALLBACK_THRESHOLD
    ][:10]  
    
    #fallback to the highest probability tag if no tags meet the threshold
    if not high_prob_tags:
        highest_prob_index = np.argmax(sample_proba)
        high_prob_tags = [mlb.classes_[highest_prob_index]]
    
    y_pred_with_fallback.append(high_prob_tags)

#prepare actual tags for comparison
actual_tags = mlb.inverse_transform(y_test)

test_df_baseline = pd.DataFrame({
    'PROGRAM_ID': dataset_for_baseline.loc[dataset_for_baseline['split'] == 'test', 'imdb_id'].values,
    'TITLE': dataset_for_baseline.loc[dataset_for_baseline['split'] == 'test', 'title'].values,
    'Generated_Tags': y_pred_with_fallback,
    'Actual_Tags': actual_tags
})

test_df_baseline.head()

In [0]:
test_df_baseline['Matching_Tags_Count'] = test_df_baseline.apply(
    lambda row: len(set(row['Generated_Tags']).intersection(set(row['Actual_Tags']))), axis=1
)

test_df_baseline

In [0]:
# ## Save output table to unity catalog
# pyspark_test_df_baseline = spark.createDataFrame(test_df_baseline)
# result_test_df_baseline = "dev_data_science.mpst_dataset.results_baseline"

# # Save the DataFrame as a Delta table in Unity Catalog
# pyspark_test_df_baseline.write \
#     .format("delta") \
#     .mode("overwrite") \
#     .saveAsTable(result_test_df_baseline)

## Import or read the table again later
results_baseline = spark.read.table("dev_data_science.mpst_dataset.results_baseline")
results_baseline = results_baseline.toPandas()

In [0]:
results_baseline

Unnamed: 0,PROGRAM_ID,TITLE,Generated_Tags,Actual_Tags,Matching_Tags_Count
0,tt0033045,The Shop Around the Corner,"[comedy, romantic, entertaining, flashback]",[romantic],1
1,tt1937113,Call of Duty: Modern Warfare 3,"[violence, flashback, murder, action, suspense...",[good versus evil],0
2,tt0102007,The Haunted,"[paranormal, horror]","[haunting, horror, paranormal]",2
3,tt2005374,The Frozen Ground,"[murder, violence]","[dramatic, murder]",1
4,tt1411238,No Strings Attached,"[romantic, pornographic]","[adult comedy, boring, cute, entertaining, fla...",1
...,...,...,...,...,...
2891,tt0105585,Thunderheart,[murder],"[flashback, murder, suspenseful]",1
2892,tt1869716,The East,"[murder, flashback, comedy, suspenseful, psych...","[neo noir, revenge, sentimental, suspenseful, ...",2
2893,tt0219952,Lucky Numbers,"[comedy, cult]","[comedy, murder]",1
2894,tt0039464,High Wall,"[murder, flashback]",[murder],1



#### Zero-shot (pre-trained) GPT-4o

Because the pre-trained GPT model will not train on our own data, we extract the test values from the original dataset as a test set.

In [0]:
test_ids = results_baseline['PROGRAM_ID'].tolist()
pretrained_GPT_dataset = MPST_df[MPST_df['imdb_id'].isin(test_ids)]
pretrained_GPT_dataset = pretrained_GPT_dataset.reset_index(drop=True)
pretrained_GPT_dataset.head()

Unnamed: 0,imdb_id,title,plot_synopsis,tags,split,award_label,num_tags,title_length,plot_length,award_label_length
0,tt0033045,The Shop Around the Corner,"Matuschek's, a gift store in Budapest, is the ...",[romantic],test,No award,1,26,4003,8
1,tt1937113,Call of Duty: Modern Warfare 3,Hours after the end of the previous game and t...,[good versus evil],test,No award,1,30,14242,8
2,tt0102007,The Haunted,This creepy and scary story centers around The...,"[paranormal, horror, haunting]",test,No award,3,11,934,8
3,tt2005374,The Frozen Ground,The film opens in an Anchorage motel room in 1...,"[dramatic, murder]",test,No award,2,17,5006,8
4,tt1411238,No Strings Attached,"15 years agoWe see two young kids, named Emma ...","[boring, adult comedy, cute, flashback, ro...",test,No award,6,19,10773,8


In [0]:
def initialize_openai_api():
    """ Initialize all environment variables to work correctly with openai. This
    was made to work with the package openai==1.38.0 """

    os.environ['OPENAI_API_KEY'] = "REDACTED"
    openai.api_type = "azure"
    openai.azure_endpoint = "REDACTED"
    openai.api_version = "2024-02-01"

initialize_openai_api()

In [0]:
pretrained_model = "gpt-4o"  

def truncate_text(text, max_length):
    return text[:max_length] + '...' if len(text) > max_length else text

#formatting and send prompt to GPT model
@retry(wait=wait_random_exponential(min=1, max=90), stop=stop_after_attempt(6))
def generate_tags_gpt(title, description, award, tag_list, max_tokens=50, temperature=0.0, model):
    #set truncation limits for the inputs
    max_title_length = 50   
    max_description_length = 10000  
    max_award_length = 50  
    
    # truncate inputs to avoid exceeding context limit
    title = truncate_text(title, max_title_length)
    description = truncate_text(description, max_description_length)
    award = truncate_text(award, max_award_length)
    
    # log the truncated inputs (optional, for debugging)
    print(f"Truncated Title: {title}")
    print(f"Truncated Description: {description}")
    print(f"Truncated Award: {award}")
    
    #prompt
    prompt = f"""
    Based on the title, synopsis, and possible awards of a movie or series, please generate tags from this list: {', '.join(tag_list)}.

    Title: {title}
    Synopsis: {description}
    Award: {award}

    Return tags from the fixed list that seem the most relevant to the entry, and hold a strict maximum of ten tags per entry. Always give at least one. Avoid duplicates at all costs.
    """
    
    ## ensure the total prompt length is within the token limit (model context limit - max_tokens for output)
    total_prompt_length = len(prompt)
    max_allowed_prompt_length = 50000 - max_tokens 
    if total_prompt_length > max_allowed_prompt_length:
        raise ValueError(f"Prompt length exceeds limit: {total_prompt_length} tokens")

    ## call the API
    response = openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant that generates tags for movies or series."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        temperature=temperature
    )
    
    # Process the response
    if response and response.choices and response.choices[0].message and response.choices[0].message.content:
        generated_tags = response.choices[0].message.content.strip().split(', ')
        # Filter to ensure tags are from the predefined list
        filtered_tags = [tag for tag in generated_tags if tag in tag_list]
    else:
        raise ValueError("Invalid response from OpenAI API")
    
    return filtered_tags[:10] 

In [0]:
subset_pretrained = pretrained_GPT_dataset
subset_pretrained['Generated_Tags'] = subset_pretrained.apply(lambda x: generate_tags_gpt(x['title'], x['plot_synopsis'], x['award_label'], fixed_tags, pretrained_model), axis=1)

In [0]:
# ## Save output table to unity catalog
# pyspark_subset_pretrained = spark.createDataFrame(subset_pretrained)
# results_pretrained = "dev_data_science.mpst_dataset.results_pretrained2"

# # Save the DataFrame as a Delta table in Unity Catalog
# pyspark_subset_pretrained.write \
#     .format("delta") \
#     .mode("overwrite") \
#     .saveAsTable(results_pretrained)

# ## Import or read the table again later
results_pretrained = spark.read.table("dev_data_science.mpst_dataset.results_pretrained2")
results_pretrained = results_pretrained.toPandas()

In [0]:
results_pretrained

Unnamed: 0,imdb_id,title,plot_synopsis,tags,split,synopsis_source,award_label,tag_count,Generated_Tags
0,tt0033045,The Shop Around the Corner,"Matuschek's, a gift store in Budapest, is the ...",[romantic],test,imdb,No award,1,"[romantic, comedy, dramatic, mystery, sentimen..."
1,tt1937113,Call of Duty: Modern Warfare 3,Hours after the end of the previous game and t...,[good versus evil],test,imdb,No award,1,"[action, suspenseful, dramatic, revenge, viole..."
2,tt0102007,The Haunted,This creepy and scary story centers around The...,"[paranormal, horror, haunting]",test,imdb,No award,3,"[horror, paranormal, dark, haunting, mystery, ..."
3,tt2005374,The Frozen Ground,The film opens in an Anchorage motel room in 1...,"[dramatic, murder]",test,wikipedia,No award,2,"[suspenseful, dark, murder, psychological, mys..."
4,tt1411238,No Strings Attached,"15 years agoWe see two young kids, named Emma ...","[boring, adult comedy, cute, flashback, romant...",test,imdb,No award,6,"[comedy, romantic, adult comedy, feel-good, dr..."
...,...,...,...,...,...,...,...,...,...
2891,tt0105585,Thunderheart,"During the early 1970s, FBI agent Ray Levoi is...","[suspenseful, murder, flashback]",test,wikipedia,No award,3,"[mystery, historical fiction, suspenseful, int..."
2892,tt1869716,The East,"Jane, an operative for the private intelligenc...","[suspenseful, neo noir, violence, tragedy, rev...",test,wikipedia,No award,6,"[intrigue, suspenseful, psychological, anti wa..."
2893,tt0219952,Lucky Numbers,"In 1988 Russ Richards (John Travolta), the wea...","[comedy, murder]",test,wikipedia,No award,2,"[comedy, satire, dark, murder, intrigue, plot ..."
2894,tt0039464,High Wall,Steven Kenet catches his unfaithful wife in th...,[murder],test,wikipedia,No award,1,"[mystery, psychological, suspenseful, murder, ..."



#### Fine-tuned GPT-4o


Creating curated JSON train dataset for finetuning (filtering out harmful content):

In [0]:
curation_sample = MPST_df.copy()
curation_sample = curation_sample.sample(500)

In [0]:
#### FILTERING OUT HARMFUL CONTENT WITH GPT-4o

curation_model = "gpt-4o"

@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(5))
def is_harmful(text):
    prompt = f"""
    Identify if the following movie synopsis contains harmful content such as hate speech, discrimination, sexual content, extreme violence, explicit material, or misinformation. 
    
    Text: "{text}"
    
    Respond with only "yes" if it is harmful or "no" if it is safe.
    """
    
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": "You are a content moderation AI that flags harmful material."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=10,
        temperature=0.0
    )
    
    if response and response.choices:
        return response.choices[0].message.content.strip().lower() == "yes"
    return False

curation_sample["is_harmful"] = curation_sample["plot_synopsis"].apply(lambda x: is_harmful(x) if pd.notnull(x) else False)
cleaned_sample = curation_sample[curation_sample["is_harmful"] == False].drop(columns=["is_harmful"])  # Remove flagged rows

cleaned_sample.to_csv("cleaned_sample.csv", index=False)

print(f"Original dataset size: {len(curation_sample)}")
print(f"Cleaned dataset size: {len(cleaned_sample)}")



##### Convert the finetune sample to the right JSON format:

In [0]:
import json

def dataframe_to_jsonl(df, output_file):
    training_data = []

    for _, row in df.iterrows():
        #ensuring that all fields are Python-native types
        prompt = f"Title: {row['TITLE']}\nSynopsis: {row['description']}\nTags: "
        completion = row['actual_tags']
        
        #convert completion to a string if it is a NumPy array
        if isinstance(completion, (list, tuple, np.ndarray)):
            completion = ", ".join(map(str, completion))  # Join array elements into a string

        training_data.append({"prompt": prompt, "completion": completion})

    with open(output_file, "w") as f:
        for entry in training_data:
            f.write(json.dumps(entry) + "\n")

    print(f"Training data saved as {output_file}")


dataframe_to_jsonl(final_curated_sample_MPST, "curated_finetune_MPST_data.jsonl")

In [0]:
with open('curated_finetune_MPST_data.jsonl', 'r') as infile:
    JSON_file = [json.loads(line) for line in infile]

In [0]:
with open('curated_finetune_MPST_data.jsonl', 'w') as outfile:
    for entry in JSON_file:
        add_entry = {"messages": [{"role": "system", "content": "You are a helpful assistant that generates tags for movies or series."}, {"role": "user", "content": entry["prompt"]}, {"role": "assistant", "content": entry["completion"]}]}
        json.dump(add_entry, outfile)
        outfile.write('\n')

In [0]:
with open('curated_finetune_MPST_data.jsonl', 'r') as infile:
  JSON_training_file = [json.loads(line) for line in infile]

In [0]:
JSON_training_file


Now that we have the curated sample data in the right format, the following steps need to be followed in training the model: https://techcommunity.microsoft.com/blog/educatordeveloperblog/fine-tuning-language-models-with-azure-ai-foundry-a-detailed-guide/4369281




After model is trained, call it here:

In [0]:
finetuned_model = "gpt-4o-2024-finetuned"  #use the right name for the developed this model

In [0]:
test_ids = results_baseline['PROGRAM_ID'].tolist()
finetuned_GPT_dataset = MPST_df[MPST_df['imdb_id'].isin(test_ids)]
finetuned_GPT_dataset = finetuned_GPT_dataset.reset_index(drop=True)
finetuned_GPT_dataset.head()

Unnamed: 0,imdb_id,title,plot_synopsis,tags,split,award_label,num_tags,title_length,plot_length,award_label_length
0,tt0033045,The Shop Around the Corner,"Matuschek's, a gift store in Budapest, is the ...",[romantic],test,No award,1,26,4003,8
1,tt1937113,Call of Duty: Modern Warfare 3,Hours after the end of the previous game and t...,[good versus evil],test,No award,1,30,14242,8
2,tt0102007,The Haunted,This creepy and scary story centers around The...,"[paranormal, horror, haunting]",test,No award,3,11,934,8
3,tt2005374,The Frozen Ground,The film opens in an Anchorage motel room in 1...,"[dramatic, murder]",test,No award,2,17,5006,8
4,tt1411238,No Strings Attached,"15 years agoWe see two young kids, named Emma ...","[boring, adult comedy, cute, flashback, ro...",test,No award,6,19,10773,8


In [0]:
subset_finetuned = finetuned_GPT_dataset
subset_finetuned['Generated_Tags'] = subset_finetuned.apply(lambda x: generate_tags_gpt(x['title'], x['plot_synopsis'], x['award_label'], fixed_tags, finetuned_model), axis=1)

In [0]:
# ## Save output table to unity catalog
# pyspark_subset_finetuned = spark.createDataFrame(subset_finetuned)
# results_finetuned = "dev_data_science.mpst_dataset.results_finetuned"

# # Save the DataFrame as a Delta table in Unity Catalog
# pyspark_subset_finetuned.write \
#     .format("delta") \
#     .mode("overwrite") \
#     .saveAsTable(results_finetuned)

# ## Import or read the table again later
results_finetuned = spark.read.table("dev_data_science.mpst_dataset.results_finetuned")
results_finetuned = results_finetuned.toPandas()

In [0]:
results_finetuned

Unnamed: 0,imdb_id,title,plot_synopsis,tags,split,synopsis_source,award_label,tag_count,Generated_Tags
0,tt0033045,The Shop Around the Corner,"Matuschek's, a gift store in Budapest, is the ...",[romantic],test,imdb,No award,1,[romantic]
1,tt1937113,Call of Duty: Modern Warfare 3,Hours after the end of the previous game and t...,[good versus evil],test,imdb,No award,1,"[action, violence, revenge, suspenseful, dark]"
2,tt0102007,The Haunted,This creepy and scary story centers around The...,"[paranormal, horror, haunting]",test,imdb,No award,3,[horror]
3,tt2005374,The Frozen Ground,The film opens in an Anchorage motel room in 1...,"[dramatic, murder]",test,wikipedia,No award,2,"[murder, suspenseful, psychological, dark, vio..."
4,tt1411238,No Strings Attached,"15 years agoWe see two young kids, named Emma ...","[boring, adult comedy, cute, flashback, romant...",test,imdb,No award,6,[comedy]
...,...,...,...,...,...,...,...,...,...
2891,tt0105585,Thunderheart,"During the early 1970s, FBI agent Ray Levoi is...","[suspenseful, murder, flashback]",test,wikipedia,No award,3,"[mystery, suspenseful, murder, historical fict..."
2892,tt1869716,The East,"Jane, an operative for the private intelligenc...","[suspenseful, neo noir, violence, tragedy, rev...",test,wikipedia,No award,6,"[revenge, suspenseful, psychological, thought-..."
2893,tt0219952,Lucky Numbers,"In 1988 Russ Richards (John Travolta), the wea...","[comedy, murder]",test,wikipedia,No award,2,"[comedy, satire, dark, murder, mystery, violence]"
2894,tt0039464,High Wall,Steven Kenet catches his unfaithful wife in th...,[murder],test,wikipedia,No award,1,"[psychological, suspenseful, murder, mystery, ..."
