## Installing and Importing Necessary Libraries

In [None]:
# Installation for GPU llama-cpp-python
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m157.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m226.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.2/133.2 kB[0m [31m313.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for llama-cpp-python (pyproject.toml) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
lida 0.0.10 requi

In [None]:
# For downloading the models from HF Hub
!pip install huggingface_hub -q

In [None]:
# Importing library for data manipulation
import pandas as pd

# Function to download the model from the Hugging Face model hub
from huggingface_hub import hf_hub_download

# Importing the Llama class from the llama_cpp module
from llama_cpp import Llama

# Importing the json module
import json

## Import the dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data = pd.read_csv("/content/drive/MyDrive/Previous_Projects/christmas_movies_reviews.csv")
df = data.sample(frac = 1)

## Data Overview

In [None]:
# checking the first five rows of the data
df.head()

Unnamed: 0,Movie,Release_Year,Year_of_Review,Stars,Review
43,Holdovers,2023,2024,1,This was a cinematic collage assembled from th...
42,Holdovers,2023,2023,4,5 stars is up there with the best movies I hav...
48,Holdovers,2023,2024,3,"Well meaning but a bit flavorless, it is a hig..."
22,Office Christmas Party,2016,2022,5,Great seasonal comedy. Specially when we have ...
44,Holdovers,2023,2024,4,"Very warmhearted and sweet film, I love a good..."


In [None]:
# checking the shape of the data
df.shape

(50, 5)

**Observations**

- Data has 20 rows and 3 columns

In [None]:
# checking for missing values
df.isnull().sum()

Movie             0
Release_Year      0
Year_of_Review    0
Stars             0
Review            0
dtype: int64

**Observations**

- There are no missing values in the data

## Model Building

### Loading the model

In [None]:
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGUF"
model_basename = "llama-2-13b-chat.Q5_K_M.gguf" # the model is in gguf format

In [None]:
# Using hf_hub_download to download a model from the Hugging Face model hub
# The repo_id parameter specifies the model name or path in the Hugging Face repository
# The filename parameter specifies the name of the file to download
model_path = hf_hub_download(
    repo_id=model_name_or_path,
    filename=model_basename
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


llama-2-13b-chat.Q5_K_M.gguf:   0%|          | 0.00/9.23G [00:00<?, ?B/s]

In [None]:
lcpp_llm = Llama(
    model_path=model_path,
    n_threads=2,  # CPU cores
    n_batch=512,  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
    n_gpu_layers=50,  # Change this value based on your model and your GPU VRAM pool.
    n_ctx=4200,  # Context window
)

llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from /root/.cache/huggingface/hub/models--TheBloke--Llama-2-13B-chat-GGUF/snapshots/4458acc949de0a9914c3eab623904d4fe999050a/llama-2-13b-chat.Q5_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 5120
llama_model_loader: - kv   4:                          llama.block_count u32              = 40
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 13824
llama_model_loader: - kv   6:                 llama.rope.dimension_

### Defining Model Response Parameters

In [None]:
def generate_llama_response(instruction, review):

    # System message explicitly instructing not to include the review text
    system_message = """
        [INST]<<SYS>>
        {}
        <</SYS>>[/INST]
    """.format(instruction)

    # Combine user_prompt and system_message to create the prompt
    prompt = f"{review}\n{system_message}"

    # Generate a response from the LLaMA model
    response = lcpp_llm(
        prompt=prompt,
        max_tokens=1024,
        temperature=0,
        top_p=0.95,
        repeat_penalty=1.0,
        top_k=50,
        stop=['INST'],
        echo=False,
        seed=42,
    )

    # Extract the sentiment from the response
    response_text = response["choices"][0]["text"]
    return response_text

## 1. Sentiment Analysis

In [None]:
# creating a copy of the data
df2 = df.copy()

In [None]:
# defining the instructions for the model
instruction_A = """
    You are an expert at analyzing christmas reviews. Classify the sentiment of the provided review into the following categories:
    - Positive
    - Negative
    - Neutral
"""

In [None]:
df2['llama_response'] = df2['Review'].apply(lambda x: generate_llama_response(instruction_A, x))


llama_print_timings:        load time =     922.30 ms
llama_print_timings:      sample time =       4.95 ms /   118 runs   (    0.04 ms per token, 23862.49 tokens per second)
llama_print_timings: prompt eval time =     921.88 ms /   237 tokens (    3.89 ms per token,   257.08 tokens per second)
llama_print_timings:        eval time =    6202.62 ms /   117 runs   (   53.01 ms per token,    18.86 tokens per second)
llama_print_timings:       total time =    7454.26 ms /   354 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =     922.30 ms
llama_print_timings:      sample time =       5.09 ms /   120 runs   (    0.04 ms per token, 23575.64 tokens per second)
llama_print_timings: prompt eval time =     528.22 ms /   206 tokens (    2.56 ms per token,   389.99 tokens per second)
llama_print_timings:        eval time =    6339.62 ms /   119 runs   (   53.27 ms per token,    18.77 tokens per second)
llama_print_timings:       total time =    7300.36 ms /   325 

In [None]:
df2['llama_response'].head()

43     Sure, I'd be happy to help! Based on the revi...
42     Sure, I'd be happy to help! Here's the sentim...
48     Sure, I'd be happy to help! Based on the revi...
22     Sure, I'd be happy to help! Based on the revi...
44     Sure, I'd be happy to help! Here's the sentim...
Name: llama_response, dtype: object

In [None]:
i = 5
print(df2.loc[i, 'llama_response'])

 Sure! Here's the sentiment analysis of the review you provided:

Sentiment Analysis:

The review expresses a positive sentiment towards the movie "The Holiday".

Reasoning:

* "The perfect thing to watch" - This phrase suggests that the reviewer has a positive opinion of the movie.
* "Pangs of unrequited love" - Although this aspect may not be a selling point for everyone, it does not detract from the overall positive sentiment of the review.
* "Tingly feeling arising from kisses" - This phrase suggests that the movie has romantic and emotional elements that the reviewer enjoys.
* "Easy on eyes actors" - This phrase suggests that the reviewer finds the actors in the movie attractive or enjoyable to watch.
* "Bonus: there's the feel good vibe too" - This phrase suggests that the movie has a positive and uplifting message, which reinforces the overall positive sentiment of the review.

Therefore, the sentiment of the review is classified as POSITIVE.


In [None]:
print(df2.loc[i, 'llama_response'])

 Sure! Here's the sentiment analysis of the review you provided:

Sentiment Analysis:

The review expresses a positive sentiment towards the movie "The Holiday".

Reasoning:

* "The perfect thing to watch" - This phrase suggests that the reviewer has a positive opinion of the movie.
* "Pangs of unrequited love" - Although this aspect may not be a selling point for everyone, it does not detract from the overall positive sentiment of the review.
* "Tingly feeling arising from kisses" - This phrase suggests that the movie has romantic and emotional elements that the reviewer enjoys.
* "Easy on eyes actors" - This phrase suggests that the reviewer finds the actors in the movie attractive or enjoyable to watch.
* "Bonus: there's the feel good vibe too" - This phrase suggests that the movie has a positive and uplifting message, which reinforces the overall positive sentiment of the review.

Therefore, the sentiment of the review is classified as POSITIVE.


In [None]:
def sentiment_extractor(llama_response):
    if 'positive' in llama_response.lower():
        return 'Positive'
    elif 'negative' in llama_response.lower():
        return 'Negative'
    elif 'neutral' in llama_response.lower():
        return 'Neutral'

In [None]:
# applying the function to the model response
df2['sentiment'] = df2['llama_response'].apply(sentiment_extractor)
df2['sentiment'].head()

43    Negative
42    Positive
48    Positive
22    Positive
44    Positive
Name: sentiment, dtype: object

In [None]:
df2['sentiment'].value_counts()

Positive    34
Negative    15
Neutral      1
Name: sentiment, dtype: int64

####**filter for rows with negative and neutral opinion**

In [None]:
df2[(df2['sentiment'] != 'Positive')]

Unnamed: 0,Movie,Release_Year,Year_of_Review,Stars,Review,llama_response,sentiment
43,Holdovers,2023,2024,1,This was a cinematic collage assembled from th...,"Sure, I'd be happy to help! Based on the revi...",Negative
36,Bad Santa,2003,2023,3,It was okay. I would not see it as Christmas m...,"Sure! Based on the review provided, I would c...",Neutral
38,Bad Santa,2003,2021,1,A dark and depressing movie that tries to pret...,"Sure, I'd be happy to help! Based on the revi...",Negative
49,Holdovers,2023,2023,1,"Of all the movies I've seen, and I've seen qui...","Sure, I'd be happy to help! Based on the revi...",Negative
13,Love Actually,2003,2023,1,How can people love this ridiculous film? Norm...,"Sure, I'd be happy to help! Based on the revi...",Negative
14,Love Actually,2003,2022,1,After 17 years I realised I really don’t like ...,"Sure, I'd be happy to help! Based on the revi...",Negative
45,Holdovers,2023,2024,2,It amazes me how mediocre films get so much pr...,"Sure! Based on the review provided, I would c...",Negative
2,The Holiday,2006,2022,2,I really want to like this movie because I lov...,"Sure, I'd be happy to help! Based on the revi...",Negative
21,Office Christmas Party,2016,2021,1,This movie could be condensed into a half hour...,"Sure, I'd be happy to help! Based on the revi...",Negative
35,Bad Santa,2003,2022,1,"If you like crass films that go nowhere, it st...","Sure, I'd be happy to help! Here's the review...",Negative


## 2. Sentiment Analysis and Returning Structured Output

In [None]:
# creating a copy of the data
df3 = df.copy()

In [None]:
# defining the instructions for the model
instruction_B = """
    You are an expert at analyzing christmas reviews. Classify the sentiment of the provided review into the following categories:
    - Positive
    - Negative
    - Neutral

    Format the output as a JSON object with a single key-value pair as shown below:
    {"sentiment": "your_sentiment_prediction"}
"""

In [None]:
df3['llama_response'] = df3['Review'].apply(lambda x: generate_llama_response(instruction_B, x))

Llama.generate: prefix-match hit

llama_print_timings:        load time =     922.30 ms
llama_print_timings:      sample time =       7.79 ms /   180 runs   (    0.04 ms per token, 23118.42 tokens per second)
llama_print_timings: prompt eval time =     695.66 ms /   271 tokens (    2.57 ms per token,   389.56 tokens per second)
llama_print_timings:        eval time =    9618.36 ms /   179 runs   (   53.73 ms per token,    18.61 tokens per second)
llama_print_timings:       total time =   10960.07 ms /   450 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =     922.30 ms
llama_print_timings:      sample time =       7.04 ms /   163 runs   (    0.04 ms per token, 23166.57 tokens per second)
llama_print_timings: prompt eval time =     534.86 ms /   242 tokens (    2.21 ms per token,   452.45 tokens per second)
llama_print_timings:        eval time =    8727.36 ms /   162 runs   (   53.87 ms per token,    18.56 tokens per second)
llama_print_timings:       to

In [None]:
df3['llama_response'].head()

43     Sure, I'd be happy to help! Based on the revi...
42     Sure, I'd be happy to help! Based on the revi...
48     Sure! Here's the sentiment analysis of the re...
22     Sure, I'd be happy to help! Based on the revi...
44     Sure, I'd be happy to help! Here's my analysi...
Name: llama_response, dtype: object

In [None]:
i = 5
print(df3.loc[i, 'Review'])

The Holiday is quite the perfect thing to watch over a peaceful weekend. What does it have? Pangs of unrequited love(agreed not a great selling point but there is more) and tingly feeling arising from kisses. You have the easy on eyes actors, some really good comedy. (Thanks to Miles aka Jack Black). Bonus: there's the feel good vibe too. So if you are ready to go through the above..this might just be the thing for you to watch.


In [None]:
# defining a function to parse the JSON output from the model
def json_extractor(json_str):
    try:
        # Find the indices of the opening and closing curly braces
        json_start = json_str.find('{')
        json_end = json_str.rfind('}')

        if json_start != -1 and json_end != -1:
            extracted_sentiment = json_str[json_start:json_end + 1]  # Extract the JSON object
            data_dict = json.loads(extracted_sentiment)
            return data_dict
        else:
            print(f"Warning: JSON object not found in response: {json_str}")
            return {}
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        return {}

In [None]:
# applying the function to the model response
df3['llama_response_parsed'] = df3['llama_response'].apply(json_extractor)
df3['llama_response_parsed'].head()

43    {'sentiment': 'Negative'}
42    {'sentiment': 'Positive'}
48     {'sentiment': 'Neutral'}
22    {'sentiment': 'Positive'}
44    {'sentiment': 'Positive'}
Name: llama_response_parsed, dtype: object

In [None]:
llama_response_parsed_df3 = pd.json_normalize(df3['llama_response_parsed'])
llama_response_parsed_df3.head()

Unnamed: 0,sentiment
0,Negative
1,Positive
2,Neutral
3,Positive
4,Positive


In [None]:
data_with_parsed_llama_output_3 = pd.concat([df3, llama_response_parsed_df3], axis=1)
data_with_parsed_llama_output_3.head()

Unnamed: 0,Movie,Release_Year,Year_of_Review,Stars,Review,llama_response,llama_response_parsed,sentiment
43,Holdovers,2023,2024,1,This was a cinematic collage assembled from th...,"Sure, I'd be happy to help! Based on the revi...",{'sentiment': 'Negative'},Positive
42,Holdovers,2023,2023,4,5 stars is up there with the best movies I hav...,"Sure, I'd be happy to help! Based on the revi...",{'sentiment': 'Positive'},Positive
48,Holdovers,2023,2024,3,"Well meaning but a bit flavorless, it is a hig...",Sure! Here's the sentiment analysis of the re...,{'sentiment': 'Neutral'},Neutral
22,Office Christmas Party,2016,2022,5,Great seasonal comedy. Specially when we have ...,"Sure, I'd be happy to help! Based on the revi...",{'sentiment': 'Positive'},Positive
44,Holdovers,2023,2024,4,"Very warmhearted and sweet film, I love a good...","Sure, I'd be happy to help! Here's my analysi...",{'sentiment': 'Positive'},Negative


In [None]:
final_data_3 = data_with_parsed_llama_output_3.drop(['llama_response_parsed'], axis=1)
final_data_3.head()

Unnamed: 0,Movie,Release_Year,Year_of_Review,Stars,Review,llama_response,sentiment
43,Holdovers,2023,2024,1,This was a cinematic collage assembled from th...,"Sure, I'd be happy to help! Based on the revi...",Positive
42,Holdovers,2023,2023,4,5 stars is up there with the best movies I hav...,"Sure, I'd be happy to help! Based on the revi...",Positive
48,Holdovers,2023,2024,3,"Well meaning but a bit flavorless, it is a hig...",Sure! Here's the sentiment analysis of the re...,Neutral
22,Office Christmas Party,2016,2022,5,Great seasonal comedy. Specially when we have ...,"Sure, I'd be happy to help! Based on the revi...",Positive
44,Holdovers,2023,2024,4,"Very warmhearted and sweet film, I love a good...","Sure, I'd be happy to help! Here's my analysi...",Negative


In [None]:
final_data_3['sentiment'].value_counts(normalize = True)

Positive    0.58
Negative    0.34
Neutral     0.08
Name: sentiment, dtype: float64

## 3. Identifying Overall Sentiment and Sentiment of Aspects of the Experience

In [None]:
# creating a copy of the data
df4 = data.copy()

In [None]:
# defining the instructions for the model
instruction_C = """
    You are an expert at analyzing christmas reviews. Classify the overall sentiment of the provided review into the following categories:
    - "Positive"
    - "Negative"
    - "Neutral"

    Once that is done, check for a mention of the following aspects in the review and classify the sentiment of each aspect as "Positive", "Negative", or "Neutral":
    1. "Plot"
    2. "Duration"
    3. "Cast"

    Output the overall sentiment and sentiment for each category in a JSON format with the following keys:
    {
        "Overall": "your_sentiment_prediction",
        "Plot": "your_sentiment_prediction",
        "Duration": "your_sentiment_prediction",
        "Cast": "your_sentiment_prediction"
    }

    In case one of the three aspects is not mentioned in the review, set "Not Applicable" (including quotes) for the corresponding JSON key value.
    Only return the JSON, do not return any other information.
"""

In [None]:
df4['llama_response'] = df4['Review'].apply(lambda x: generate_llama_response(instruction_C, x))

Llama.generate: prefix-match hit

llama_print_timings:        load time =     922.30 ms
llama_print_timings:      sample time =      45.32 ms /  1024 runs   (    0.04 ms per token, 22596.88 tokens per second)
llama_print_timings: prompt eval time =     765.44 ms /   368 tokens (    2.08 ms per token,   480.77 tokens per second)
llama_print_timings:        eval time =   58049.40 ms /  1023 runs   (   56.74 ms per token,    17.62 tokens per second)
llama_print_timings:       total time =   63176.62 ms /  1391 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =     922.30 ms
llama_print_timings:      sample time =      44.24 ms /  1024 runs   (    0.04 ms per token, 23146.47 tokens per second)
llama_print_timings: prompt eval time =     783.26 ms /   378 tokens (    2.07 ms per token,   482.60 tokens per second)
llama_print_timings:        eval time =   60655.70 ms /  1023 runs   (   59.29 ms per token,    16.87 tokens per second)
llama_print_timings:       to

In [None]:
df4['llama_response'].head()

0    \n    {\n        "Overall": "Positive",\n     ...
1    \n    {\n        "Overall": "Positive",\n     ...
2     Sure, I can help you with that! Here's the ov...
3     {\n        "Overall": "Negative",\n        "P...
4     Sure, I'd be happy to help! Here's the review...
Name: llama_response, dtype: object

In [None]:
i = 5
print(df4.loc[i, 'Review'])

The Holiday is quite the perfect thing to watch over a peaceful weekend. What does it have? Pangs of unrequited love(agreed not a great selling point but there is more) and tingly feeling arising from kisses. You have the easy on eyes actors, some really good comedy. (Thanks to Miles aka Jack Black). Bonus: there's the feel good vibe too. So if you are ready to go through the above..this might just be the thing for you to watch.


In [None]:
print(df4.loc[i, 'llama_response'])

 {
        "Overall": "Positive",
        "Plot": "Positive",
        "Duration": "Neutral",
        "Cast": "Positive"
    }


In [None]:
# applying the function to the model response
df4['llama_response_parsed'] = df4['llama_response'].apply(json_extractor)
df4['llama_response_parsed'].head()

0    {'Overall': 'Positive', 'Plot': 'Positive', 'D...
1    {'Overall': 'Positive', 'Plot': 'Positive', 'D...
2    {'Overall': 'Negative', 'Plot': 'Negative', 'D...
3    {'Overall': 'Negative', 'Plot': 'Negative', 'D...
4    {'Overall': 'Neutral', 'Plot': 'Negative', 'Du...
Name: llama_response_parsed, dtype: object

In [None]:
llama_response_parsed_df_4 = pd.json_normalize(df4['llama_response_parsed'])
llama_response_parsed_df_4.head()

Unnamed: 0,Overall,Plot,Duration,Cast
0,Positive,Positive,Not Applicable,Positive
1,Positive,Positive,Not Applicable,Positive
2,Negative,Negative,Neutral,Positive
3,Negative,Negative,Not Applicable,Negative
4,Neutral,Negative,Not Applicable,Positive


In [None]:
data_with_parsed_llama_output_4 = pd.concat([df4, llama_response_parsed_df_4], axis=1)
data_with_parsed_llama_output_4.head()

Unnamed: 0,Movie,Release_Year,Year_of_Review,Stars,Review,llama_response,llama_response_parsed,Overall,Plot,Duration,Cast
0,The Holiday,2006,2022,5,This beautiful movie brought me to tears of jo...,"\n {\n ""Overall"": ""Positive"",\n ...","{'Overall': 'Positive', 'Plot': 'Positive', 'D...",Positive,Positive,Not Applicable,Positive
1,The Holiday,2006,2019,4,This movie gives you all the right feels and I...,"\n {\n ""Overall"": ""Positive"",\n ...","{'Overall': 'Positive', 'Plot': 'Positive', 'D...",Positive,Positive,Not Applicable,Positive
2,The Holiday,2006,2022,2,I really want to like this movie because I lov...,"Sure, I can help you with that! Here's the ov...","{'Overall': 'Negative', 'Plot': 'Negative', 'D...",Negative,Negative,Neutral,Positive
3,The Holiday,2006,2020,1,I don’t think I’ve ever hated a film this much...,"{\n ""Overall"": ""Negative"",\n ""P...","{'Overall': 'Negative', 'Plot': 'Negative', 'D...",Negative,Negative,Not Applicable,Negative
4,The Holiday,2006,2018,3,I watched the movie several times and I always...,"Sure, I'd be happy to help! Here's the review...","{'Overall': 'Neutral', 'Plot': 'Negative', 'Du...",Neutral,Negative,Not Applicable,Positive


In [None]:
final_data_4 = data_with_parsed_llama_output_4.drop(['llama_response','llama_response_parsed'], axis=1)
final_data_4.head()

Unnamed: 0,Movie,Release_Year,Year_of_Review,Stars,Review,Overall,Plot,Duration,Cast
0,The Holiday,2006,2022,5,This beautiful movie brought me to tears of jo...,Positive,Positive,Not Applicable,Positive
1,The Holiday,2006,2019,4,This movie gives you all the right feels and I...,Positive,Positive,Not Applicable,Positive
2,The Holiday,2006,2022,2,I really want to like this movie because I lov...,Negative,Negative,Neutral,Positive
3,The Holiday,2006,2020,1,I don’t think I’ve ever hated a film this much...,Negative,Negative,Not Applicable,Negative
4,The Holiday,2006,2018,3,I watched the movie several times and I always...,Neutral,Negative,Not Applicable,Positive


In [None]:
final_data_4['Overall'].value_counts(normalize = True)

Positive    0.58
Negative    0.34
Neutral     0.08
Name: Overall, dtype: float64

In [None]:
final_data_4['Plot'].value_counts()

Positive    28
Negative    18
Neutral      4
Name: Plot, dtype: int64

In [None]:
final_data_4['Duration'].value_counts()

Neutral           23
Not Applicable    21
Negative           4
Positive           2
Name: Duration, dtype: int64

In [None]:
final_data_4['Cast'].value_counts()

Positive          33
Neutral            8
Not Applicable     5
Negative           4
Name: Cast, dtype: int64

## 4. Identifying Overall Sentiment, Sentiment of Aspects of the Experience, and the Liked/Disliked Features of the Different Aspects of the Experience

In [None]:
# creating a copy of the data
df5 = data.copy()

In [None]:
# defining the instructions for the model
instruction_D = """
     You are an expert at analyzing christmas reviews. Your goal is to classify the overall sentiment of the provided review into the following categories:
        - Positive
        - Negative
        - Neutral

    Subsequently, assess the sentiment of specific aspects mentioned in the review, namely:
        1. Plot
        2. Duration
        3. Cast

    Further, identify liked and/or disliked features associated with each aspect in the review.

    Return the output in the specified JSON format, ensuring consistency and handling missing values appropriately:

    {
        "Overall": "your_sentiment_prediction",
        "Plot": "your_sentiment_prediction",
        "Duration": "your_sentiment_prediction",
        "Cast": "your_sentiment_prediction",
        "Plot Features": ["liked/disliked features"],
        "Duration Features": ["liked/disliked features"],
        "Cast Features": ["liked/disliked features"]
    }

    The sentiment prediction for Overall, Plot, Duration, and Cast should be one of "Positive", "Negative", or "Neutral" only.
    In case one of the three aspects is not mentioned in the review, set "Not Applicable" (including quotes) in the corresponding JSON key value for the sentiment.
    In case there are no liked/disliked features for a particular aspect, assign an empty list in the corresponding JSON key value for the aspect.
    Only return the JSON, do NOT return any other text or information.
"""

In [None]:
df5['llama_response'] = df5['Review'].apply(lambda x: generate_llama_response(instruction_D, x).replace('\n', ''))

Llama.generate: prefix-match hit

llama_print_timings:        load time =     922.30 ms
llama_print_timings:      sample time =      19.79 ms /   465 runs   (    0.04 ms per token, 23497.90 tokens per second)
llama_print_timings: prompt eval time =    1027.38 ms /   484 tokens (    2.12 ms per token,   471.10 tokens per second)
llama_print_timings:        eval time =   30020.20 ms /   464 runs   (   64.70 ms per token,    15.46 tokens per second)
llama_print_timings:       total time =   33016.28 ms /   948 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =     922.30 ms
llama_print_timings:      sample time =      18.55 ms /   434 runs   (    0.04 ms per token, 23401.27 tokens per second)
llama_print_timings: prompt eval time =    1053.30 ms /   493 tokens (    2.14 ms per token,   468.05 tokens per second)
llama_print_timings:        eval time =   27962.72 ms /   433 runs   (   64.58 ms per token,    15.48 tokens per second)
llama_print_timings:       to

In [None]:
i = 5
print(df5.loc[i, 'Review'])

The Holiday is quite the perfect thing to watch over a peaceful weekend. What does it have? Pangs of unrequited love(agreed not a great selling point but there is more) and tingly feeling arising from kisses. You have the easy on eyes actors, some really good comedy. (Thanks to Miles aka Jack Black). Bonus: there's the feel good vibe too. So if you are ready to go through the above..this might just be the thing for you to watch.


In [None]:
print(df5.loc[i, 'llama_response'])

 Sure, I'd be happy to help! Here's the review you provided, along with my analysis of the sentiment and features mentioned:Review:The Holiday is quite the perfect thing to watch over a peaceful weekend. What does it have? Pangs of unrequited love(agreed not a great selling point but there is more) and tingly feeling arising from kisses. You have the easy on eyes actors, some really good comedy. (Thanks to Miles aka Jack Black). Bonus: there's the feel good vibe too. So if you are ready to go through the above..this might just be the thing for you to watch.Overall:The overall sentiment of the review is Positive.Plot:The sentiment for the plot is Neutral. The reviewer mentions "pangs of unrequited love" which could be seen as a negative aspect, but they also mention that there is more to the movie than just that.Duration:The sentiment for the duration is Not Applicable, as the reviewer does not mention anything about the length of the movie.Cast:The sentiment for the cast is Positive. T

In [None]:
# applying the function to the model response
df5['llama_response_parsed'] = df5['llama_response'].apply(json_extractor)
df5['llama_response_parsed'].head()



0    {'Overall': 'Positive', 'Plot': 'Positive', 'D...
1    {'Overall': 'Positive', 'Plot': 'Not Applicabl...
2    {'Overall': 'Neutral', 'Plot': 'Neutral', 'Dur...
3    {'Overall': 'Negative', 'Plot': 'Negative', 'D...
4    {'Overall': 'Positive', 'Plot': 'Positive', 'D...
Name: llama_response_parsed, dtype: object

In [None]:
df5[df5.llama_response_parsed == {}]

Unnamed: 0,Movie,Release_Year,Year_of_Review,Stars,Review,llama_response,llama_response_parsed
13,Love Actually,2003,2023,1,How can people love this ridiculous film? Norm...,"Sure, I'd be happy to help! Here's the review...",{}


- There are three model responses that the JSON parser function could not parse
- We'll manually add the values for these three responses

In [None]:
print(df5.loc[5, 'llama_response'])

 Sure, I'd be happy to help! Here's the review you provided, along with my analysis of the sentiment and features mentioned:Review:The Holiday is quite the perfect thing to watch over a peaceful weekend. What does it have? Pangs of unrequited love(agreed not a great selling point but there is more) and tingly feeling arising from kisses. You have the easy on eyes actors, some really good comedy. (Thanks to Miles aka Jack Black). Bonus: there's the feel good vibe too. So if you are ready to go through the above..this might just be the thing for you to watch.Overall:The overall sentiment of the review is Positive.Plot:The sentiment for the plot is Neutral. The reviewer mentions "pangs of unrequited love" which could be seen as a negative aspect, but they also mention that there is more to the movie than just that.Duration:The sentiment for the duration is Not Applicable, as the reviewer does not mention anything about the length of the movie.Cast:The sentiment for the cast is Positive. T

In [None]:
print(df5.loc[9, 'llama_response'])

 Sure, I'd be happy to help! Here's the review you provided, along with my analysis of the sentiment and features mentioned:Review:One of the worst “Christmas movies” ever made. Such a waste of amazing acting talent. 2 hours and 15 minutes of my life that I’ll never get back. Absolute rubbish! Had to give it one star to post a review.Overall: NegativePlot: NegativeDuration: NegativeCast: NegativePlot Features: []Duration Features: []Cast Features: []The review is overwhelmingly negative, with the reviewer expressing their disappointment and frustration with the movie. They mention that it was a waste of acting talent and that they regret spending 2 hours and 15 minutes watching it. The reviewer also uses negative language such as "worst," "rubbish," and "absolute rubbish" to express their dislike for the movie.Based on the review, it is clear that the reviewer did not like the movie and would not recommend it to others. There are no liked features mentioned in the review, and the revie

In [None]:
print(df5.loc[10, 'llama_response'])

 Sure, I'd be happy to help! Here's the review you provided, along with my analysis of the sentiment and features mentioned:Review:"This movie is so well written with it's many segments that all meld into one big group whose lives are all intertwined in some way that culminates in love being all around.  The acting is awesome on all counts and so much fun in the different circumstances and humor!  Definitely recommend this flick as a "feel good" movie!  The first couple of views I found difficult to keep up with who was who as it is quite fast moving from one scenario to the next but the more I viewed it and got to know who everyone was I enjoyed it greatly!  Love the music in it too!  We watch it every Christmas and it never disappoints!!!!"Overall:The overall sentiment of the review is Positive.Plot:The sentiment for the plot is Positive. The reviewer enjoyed the many segments that all melded together and found the story to be well-written.Duration:The sentiment for the duration is N

**Note**: The values model responses that cannot be parsed correctly by the JSON parser function may vary with execution due to the randomness associated with LLMs. Kindly update as observed when run in your system.

In [None]:
llama_response_parsed_df_5 = pd.json_normalize(df5['llama_response_parsed'])
llama_response_parsed_df_5.head()

Unnamed: 0,Overall,Plot,Duration,Cast,Plot Features,Duration Features,Cast Features
0,Positive,Positive,Not Applicable,Positive,"[emotional, joyful, hopeful]",[Not Applicable],[amazing]
1,Positive,Not Applicable,Not Applicable,Positive,[],[],"[Eli Wallach, the cast]"
2,Neutral,Neutral,Positive,Positive,[underwhelming experience with too much repeti...,[just right],[great performances]
3,Negative,Negative,Not Applicable,Negative,[],[],[]
4,Positive,Positive,Neutral,Positive,[engaging storyline],[adequate length],[great performances]


In [None]:
data_with_parsed_model_output_5 = pd.concat([df5, llama_response_parsed_df_5], axis=1)
data_with_parsed_model_output_5.head()

Unnamed: 0,Movie,Release_Year,Year_of_Review,Stars,Review,llama_response,llama_response_parsed,Overall,Plot,Duration,Cast,Plot Features,Duration Features,Cast Features
0,The Holiday,2006,2022,5,This beautiful movie brought me to tears of jo...,"Sure, I'd be happy to help! Here's the review...","{'Overall': 'Positive', 'Plot': 'Positive', 'D...",Positive,Positive,Not Applicable,Positive,"[emotional, joyful, hopeful]",[Not Applicable],[amazing]
1,The Holiday,2006,2019,4,This movie gives you all the right feels and I...,"Sure, I'd be happy to help! Here's the review...","{'Overall': 'Positive', 'Plot': 'Not Applicabl...",Positive,Not Applicable,Not Applicable,Positive,[],[],"[Eli Wallach, the cast]"
2,The Holiday,2006,2022,2,I really want to like this movie because I lov...,"Sure, I'd be happy to help! Here's the review...","{'Overall': 'Neutral', 'Plot': 'Neutral', 'Dur...",Neutral,Neutral,Positive,Positive,[underwhelming experience with too much repeti...,[just right],[great performances]
3,The Holiday,2006,2020,1,I don’t think I’ve ever hated a film this much...,"Sure, I'd be happy to help! Here's the review...","{'Overall': 'Negative', 'Plot': 'Negative', 'D...",Negative,Negative,Not Applicable,Negative,[],[],[]
4,The Holiday,2006,2018,3,I watched the movie several times and I always...,"Sure, I'd be happy to help you analyze the re...","{'Overall': 'Positive', 'Plot': 'Positive', 'D...",Positive,Positive,Neutral,Positive,[engaging storyline],[adequate length],[great performances]


In [None]:
final_data_5 = data_with_parsed_model_output_5.drop(['llama_response','llama_response_parsed'], axis=1)
final_data_5.head()

Unnamed: 0,Movie,Release_Year,Year_of_Review,Stars,Review,Overall,Plot,Duration,Cast,Plot Features,Duration Features,Cast Features
0,The Holiday,2006,2022,5,This beautiful movie brought me to tears of jo...,Positive,Positive,Not Applicable,Positive,"[emotional, joyful, hopeful]",[Not Applicable],[amazing]
1,The Holiday,2006,2019,4,This movie gives you all the right feels and I...,Positive,Not Applicable,Not Applicable,Positive,[],[],"[Eli Wallach, the cast]"
2,The Holiday,2006,2022,2,I really want to like this movie because I lov...,Neutral,Neutral,Positive,Positive,[underwhelming experience with too much repeti...,[just right],[great performances]
3,The Holiday,2006,2020,1,I don’t think I’ve ever hated a film this much...,Negative,Negative,Not Applicable,Negative,[],[],[]
4,The Holiday,2006,2018,3,I watched the movie several times and I always...,Positive,Positive,Neutral,Positive,[engaging storyline],[adequate length],[great performances]


In [None]:
final_data_5['Overall'].value_counts(normalize = True)

Positive    0.612245
Negative    0.306122
Neutral     0.081633
Name: Overall, dtype: float64

In [None]:
final_data_5['Plot Features'].value_counts(normalize = False)

[]                                                                   28
[engaging storyline]                                                  3
[emotional, joyful, hopeful]                                          1
[funny, whacky]                                                       1
[typical boring storyline]                                            1
[warmhearted, sweet, intimate]                                        1
[great and simple]                                                    1
[heartwarming, character-driven]                                      1
[enjoyed the movie, incorrect in a positive way]                      1
[liked]                                                               1
[Unfunny and boring]                                                  1
[engaging]                                                            1
[predictable plot]                                                    1
[heart-warming, overwhelming, feel-good]                        

In [None]:
final_data_4['Duration'].value_counts()

Neutral           23
Not Applicable    21
Negative           4
Positive           2
Name: Duration, dtype: int64

In [None]:
final_data_4['Cast'].value_counts()

Positive          33
Neutral            8
Not Applicable     5
Negative           4
Name: Cast, dtype: int64

## 5. Identifying Overall Sentiment, Sentiment of Aspects of the Experience, Liked/Disliked Features of the Different Aspects of the Movie, and Sharing a Response

In [None]:
# creating a copy of the data
df6 = df.copy()

In [None]:
# defining the instructions for the model
instruction_E = """
    You are an expert at analyzing christmas reviews. Classify the overall sentiment of the provided review into the following categories:
    - "Positive"
    - "Negative"
    - "Neutral"

    Once that is done, check for a mention of the following aspects in the review and clasify the sentiment of each aspect as positive, negative, or neutral:
    1. Plot
    2. Duration
    3. Cast

    Once that is done, look for liked and/or disliked features mentioned against each of the above aspects in the review and extract them.

    Finally, draft a response for the reviewer based on the review. Start out with an appreciation text and then add on to it as per the following:
    1. If the review is positive, mention that the next sequel
    2. If the review is neutral, ask them for what could have been done to improve their cinematography experience
    3. If the review is negative, let them know that feedback will be reviewed and incorporated in the sequel

    Return the output in the specified JSON format, ensuring consistency and handling missing values appropriately Ensure that all values in the JSON are formatted as strings, and each element within the lists should be enclosed in double quotes:

    {
        "Overall": "your_sentiment_prediction",
        "Plot": "your_sentiment_prediction",
        "Duration": "your_sentiment_prediction",
        "Cast": "your_sentiment_prediction",
        "Plot Features": ["liked/disliked features"],
        "Duration Features": ["liked/disliked features"],
        "Cast Features": ["liked/disliked features"],
        "Response": "your_response_to_the_customer_review",
    }

    The sentiment prediction for Overall, Plot, Duration, and Cast should be one of "Positive", "Negative", or "Neutral" only.
    In case one of the three aspects is not mentioned in the review, set "Not Applicable" (including quotes) in the corresponding JSON key value for the sentiment.
    In case there are no liked/disliked features for a particular aspect, assign an empty list in the corresponding JSON key value for the aspect.
    Be polite and empathetic in the response to the customer review.
    Only return the JSON, do NOT return any other text or information.
"""

In [None]:
df6['llama_response'] = df6['Review'].apply(lambda x: generate_llama_response(instruction_E, x))

Llama.generate: prefix-match hit

llama_print_timings:        load time =     922.30 ms
llama_print_timings:      sample time =      17.72 ms /   413 runs   (    0.04 ms per token, 23310.94 tokens per second)
llama_print_timings: prompt eval time =    1469.72 ms /   748 tokens (    1.96 ms per token,   508.94 tokens per second)
llama_print_timings:        eval time =   23259.00 ms /   412 runs   (   56.45 ms per token,    17.71 tokens per second)
llama_print_timings:       total time =   26413.26 ms /  1160 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =     922.30 ms
llama_print_timings:      sample time =      13.32 ms /   307 runs   (    0.04 ms per token, 23049.78 tokens per second)
llama_print_timings: prompt eval time =    1478.17 ms /   718 tokens (    2.06 ms per token,   485.74 tokens per second)
llama_print_timings:        eval time =   17476.76 ms /   306 runs   (   57.11 ms per token,    17.51 tokens per second)
llama_print_timings:       to

In [None]:
i = 5
print(df6.loc[i, 'Review'])

The Holiday is quite the perfect thing to watch over a peaceful weekend. What does it have? Pangs of unrequited love(agreed not a great selling point but there is more) and tingly feeling arising from kisses. You have the easy on eyes actors, some really good comedy. (Thanks to Miles aka Jack Black). Bonus: there's the feel good vibe too. So if you are ready to go through the above..this might just be the thing for you to watch.


In [None]:
print(df6.loc[i, 'llama_response'])


    Sure, I'd be happy to help! Here's the review you provided:

"The Holiday is quite the perfect thing to watch over a peaceful weekend. What does it have? Pangs of unrequited love(agreed not a great selling point but there is more) and tingly feeling arising from kisses. You have the easy on eyes actors, some really good comedy. (Thanks to Miles aka Jack Black). Bonus: there's the feel good vibe too. So if you are ready to go through the above..this might just be the thing for you to watch."

Based on the review, I would classify the overall sentiment as "Positive".

Here's the sentiment breakdown for each aspect:

* Plot: Positive
* Duration: Not Applicable (not mentioned in the review)
* Cast: Positive

Here are the liked/disliked features for each aspect:

* Plot: Not mentioned
* Duration: Not applicable
* Cast: Jack Black's comedy

Based on the review, here's a possible response:

"Hi there! Thank you so much for taking the time to review our movie, The Holiday. We're thrilled 

In [None]:
# applying the function to the model response
df6['llama_response_parsed'] = df6['llama_response'].apply(json_extractor)
df6['llama_response_parsed'].head()

Error parsing JSON: Expecting property name enclosed in double quotes: line 10 column 5 (char 522)
    Sure, I'd be happy to help you with that! Here's the review you provided, along with my analysis of the sentiment and features mentioned:

Review:

"The Holdovers is a very warmhearted and sweet film, I love a good, intimate 'American characters closed off in a private world in winter' film and this took you deep into such territory. Although it is also quite surprisingly sweary and cheeky thanks in large part to Mary Lamb which is very funny. She supplies a lot of the laughs in this film through that vein. Whether a female character in that era would really swear like that I don't know but it made for great entertainment and was very funny with her straight talking, emotionally honest and aware character. The Holdovers took you deep into that inner realm and took a lot of trouble to unfurl character development which is one of its charms. At the beginning of the film the five schoolb

43    {'Overall': 'Negative', 'Plot': 'Negative', 'D...
42                                                   {}
48    {'Overall': 'Neutral', 'Plot': 'Neutral', 'Dur...
22    {'Overall': 'Positive', 'Plot': 'Not Applicabl...
44                                                   {}
Name: llama_response_parsed, dtype: object

In [None]:
llama_response_parsed_df_6 = pd.json_normalize(df6['llama_response_parsed'])
llama_response_parsed_df_6.head()

Unnamed: 0,Overall,Plot,Duration,Cast,Plot Features,Duration Features,Cast Features,Response
0,Negative,Negative,Neutral,Neutral,"[underdeveloped characters, predictable moments]",[ adequate length],[mixed performances],Thank you for taking the time to share your fe...
1,,,,,,,,
2,Neutral,Neutral,Neutral,Neutral,[Not Applicable],[Not Applicable],[Not Applicable],Thank you for taking the time to share your th...
3,Positive,Not Applicable,Positive,Positive,[],[],[great play by actors],Thank you for your positive review of Office C...
4,,,,,,,,


In [None]:
data_with_parsed_model_output_6 = pd.concat([df6, llama_response_parsed_df_6], axis=1)
data_with_parsed_model_output_6.head()

Unnamed: 0,Movie,Release_Year,Year_of_Review,Stars,Review,llama_response,llama_response_parsed,Overall,Plot,Duration,Cast,Plot Features,Duration Features,Cast Features,Response
43,Holdovers,2023,2024,1,This was a cinematic collage assembled from th...,"\n {\n ""Overall"": ""Negative"",\n ...","{'Overall': 'Negative', 'Plot': 'Negative', 'D...",Positive,Positive,Positive,Positive,"[heart-warming, overwhelming, feel-good, closu...",[Christmas vibes],"[perfect for their characters, roles]",Thank you for your kind words about our movie!...
42,Holdovers,2023,2023,4,5 stars is up there with the best movies I hav...,"\n {\n ""Overall"": ""Positive"",\n ...",{},Positive,Positive,Not Applicable,Positive,[],[],[],"Dear [Reviewer],\n\nThank you so much for taki..."
48,Holdovers,2023,2024,3,"Well meaning but a bit flavorless, it is a hig...","\n {\n ""Overall"": ""Neutral"",\n ...","{'Overall': 'Neutral', 'Plot': 'Neutral', 'Dur...",Neutral,Not Applicable,Not Applicable,Not Applicable,[],[],[],Thank you for taking the time to review our mo...
22,Office Christmas Party,2016,2022,5,Great seasonal comedy. Specially when we have ...,"\n Sure, I'd be happy to help! Here's the r...","{'Overall': 'Positive', 'Plot': 'Not Applicabl...",Positive,Positive,Not Applicable,Positive,[],[],[Eli Wallach's performance],Thank you so much for taking the time to share...
44,Holdovers,2023,2024,4,"Very warmhearted and sweet film, I love a good...","\n Sure, I'd be happy to help you with that...",{},Neutral,Neutral,Neutral,Neutral,[],[],[],Thank you for taking the time to share your th...


In [None]:
final_df6 = data_with_parsed_model_output_6.drop(['llama_response','llama_response_parsed'], axis=1)
final_df6.head()

Unnamed: 0,Movie,Release_Year,Year_of_Review,Stars,Review,Overall,Plot,Duration,Cast,Plot Features,Duration Features,Cast Features,Response
43,Holdovers,2023,2024,1,This was a cinematic collage assembled from th...,Positive,Positive,Positive,Positive,"[heart-warming, overwhelming, feel-good, closu...",[Christmas vibes],"[perfect for their characters, roles]",Thank you for your kind words about our movie!...
42,Holdovers,2023,2023,4,5 stars is up there with the best movies I hav...,Positive,Positive,Not Applicable,Positive,[],[],[],"Dear [Reviewer],\n\nThank you so much for taki..."
48,Holdovers,2023,2024,3,"Well meaning but a bit flavorless, it is a hig...",Neutral,Not Applicable,Not Applicable,Not Applicable,[],[],[],Thank you for taking the time to review our mo...
22,Office Christmas Party,2016,2022,5,Great seasonal comedy. Specially when we have ...,Positive,Positive,Not Applicable,Positive,[],[],[Eli Wallach's performance],Thank you so much for taking the time to share...
44,Holdovers,2023,2024,4,"Very warmhearted and sweet film, I love a good...",Neutral,Neutral,Neutral,Neutral,[],[],[],Thank you for taking the time to share your th...


In [None]:
final_df6['Overall'].value_counts()

Positive    17
Negative    12
Neutral      9
Name: Overall, dtype: int64

In [None]:
final_df6['Plot'].value_counts()

Negative          12
Positive          11
Neutral           10
Not Applicable     5
Name: Plot, dtype: int64

In [None]:
final_df6['Duration'].value_counts()

Not Applicable    11
Neutral           11
Positive           8
Negative           8
Name: Duration, dtype: int64

In [None]:
final_df6['Cast'].value_counts()

Positive          21
Neutral           12
Negative           4
Not Applicable     1
Name: Cast, dtype: int64

## Conclusions



- for model optimization, we can explore the following to improve performance:
    1. finetune the prompt
    2. Update the model parameters (`temparature`, `top_p`, `top_k`, `max_toxen`)



___