In [1]:
!pip install litellm

Collecting litellm
  Downloading litellm-1.63.6-py3-none-any.whl (6.9 MB)
[K     |████████████████████████████████| 6.9 MB 7.7 MB/s eta 0:00:01
[?25hCollecting pydantic<3.0.0,>=2.0.0
  Downloading pydantic-2.10.6-py3-none-any.whl (431 kB)
[K     |████████████████████████████████| 431 kB 23.8 MB/s eta 0:00:01
[?25hCollecting python-dotenv>=0.2.0
  Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Collecting jinja2<4.0.0,>=3.1.2
  Downloading jinja2-3.1.6-py3-none-any.whl (134 kB)
[K     |████████████████████████████████| 134 kB 61.3 MB/s eta 0:00:01
[?25hCollecting tiktoken>=0.7.0
  Downloading tiktoken-0.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 66.3 MB/s eta 0:00:01
[?25hCollecting jsonschema<5.0.0,>=4.22.0
  Downloading jsonschema-4.23.0-py3-none-any.whl (88 kB)
[K     |████████████████████████████████| 88 kB 6.9 MB/s  eta 0:00:01
Collecting tokenizers
  Downloading tokenizers-0.21.0-cp39-abi

In [2]:
import litellm
import pandas as pd
import os

# Zero-Shot Prompting

In [5]:
from litellm import completion

os.environ["OPENROUTER_API_KEY"] = "..."

def classify_sentence(sentence):

    response = completion(
        model="openrouter/meta-llama/llama-3.3-70b-instruct",
        messages=[{
            "content": f"Does the sentence: {sentence} contains a comparison, a simile, or not applicable? Answer with the word \"Comparison\" or the word \"Simile\" or the word \"Not Applicable\" only. Do not write anything else.",
            "role": "user"
        }]
    )

    return response.choices[0].message.content.strip()

file = './Gold Annotation.csv'
df = pd.read_csv(file)

df['Predicted'] = df['Sentences'].apply(classify_sentence)

output_csv_path = 'Zero-Shot.csv'
df.to_csv(output_csv_path, index=False)

print(f"Classification completed. Results saved to {output_csv_path}.")

Classification completed. Results saved to Zero-Shot.csv.


In [14]:
print("Unique values in 'gold':", df['Gold'].unique())
print("Unique values in 'Predicted':", df['Predicted'].unique())

Unique values in 'gold': ['Simile' 'Not Applicable' 'Comparison']
Unique values in 'Predicted': ['Simile' 'Comparison' 'Not Applicable']


In [6]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(df['Gold'], df['Predicted'])

print(f"Accuracy: {accuracy}")

Accuracy: 0.72


In [16]:
file1 = './Gold Annotation.csv'
file2 = './Zero_Shot.csv'

df1 = pd.read_csv(file1)
df2 = pd.read_csv(file2)


df = pd.DataFrame({
    'Sentences': df1['Sentences'],
    'Human Annotation': df1['Gold'],
    'LLM': df2['Predicted']
})

df['Human Annotation'] = df['Human Annotation'].str.strip()
df['LLM'] = df['LLM'].str.strip()

mismatched = df[df['Human Annotation'] != df['LLM']]
file= './mismatched.csv'
mismatched.to_csv(file, index=False)

print(mismatched)

                                            Sentences Human Annotation  \
3   He paid as much as a million dollars for the p...   Not Applicable   
12                      I am as merry as a school-boy       Comparison   
13                    I am as giddy as a drunken man.       Comparison   
17  And yet, according to the World Health Organiz...   Not Applicable   
20                          It's as lovely as a rose.   Not Applicable   
21  He would stand still and lay his head on the m...       Comparison   
27  Having eluded killers like malaria and AIDS, o...   Not Applicable   
29  China has emerged as a world power far more qu...   Not Applicable   
35  Nothing keeps a man as spry as a young woman a...   Not Applicable   
36  Few treasures are worth as much as a friend wh...           Simile   
37                        He's as drunk as a fiddler.       Comparison   
43               He is a figment as much as a figure.   Not Applicable   
47                       A miss is as 

## Few-shot Prompting 

In [None]:
import os
import pandas as pd
from litellm import completion

# Set your API key
os.environ["OPENROUTER_API_KEY"] = "..."

def classify_sentence(sentence):
    examples = """
Here are some examples to guide your response:
1. Tom is as fast as a rabbit- Simile
2. He donated as much as 50,000 dollars to the charity- Not Applicable
3. An elephant isn't as big as a whale- Comparison

Instruction:
  1. If there is unspecified subject or object of comparison, you should mark it as Not Applicable. Some examples:
     a. Nothing is as good as a breath of fresh air.
     b. It's as beautiful as ever.

  2. If the subject and object of comparison belongs to the same category (human-human, animal-animal, celestial body, social gathering), you should mark it as a Comparison. Some examples:
     a. I am as beautiful as my mother
     b. She is as strong as her father
     c. He was as drunk as the guitarist
     d. The Earth looks as round as the Sun
     e. Her eyes are as beautiful as a child's
     f. The surface was as white as the wall

  3. If we have idiomatic expressions, then we mark it as Not Applicable. Some examples:
     a. I am feeling under the weather today

  4. If we have "like" as an example in the sentence, we will mark it as Not Applicable.
     a. I feel like an ice cream
    """

    prompt = f"{examples}\nNow classify the sentence: \"{sentence}\". Answer with \"Comparison\", \"Simile\", or \"Not Applicable\" only. Do not write anything else."


    response = completion(
        model="openrouter/meta-llama/llama-3.3-70b-instruct",
        messages=[{
            "content": prompt,
            "role": "user"
        }]
    )

    return response['choices'][0]['message']['content'].strip()

file = './Gold Annotation.csv'
df = pd.read_csv(file)

df['Predicted'] = df['Sentences'].apply(classify_sentence)

output_csv_path = 'Few_shot.csv'
df.to_csv(output_csv_path, index=False)

print(f"Classification completed. Results saved to {output_csv_path}.")

Classification completed. Results saved to Few_shot.csv.


In [None]:
print(df['Gold'].unique())
print(df['Predicted'].unique())

['Simile' 'Not Applicable' 'Comparison']
['Simile' 'Not Applicable' 'Comparison']


In [None]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(df['Gold'], df['Predicted'])

print(f"Accuracy: {accuracy}")

Accuracy: 0.82


In [None]:
file1 = './Gold Annotation.csv'
file2 = './Few_shot.csv'

df1 = pd.read_csv(file1)
df2 = pd.read_csv(file2)

df = pd.DataFrame({
    'Sentences': df1['Sentences'],
    'Human Annotation': df1['Gold'],
    'Few_shot': df2['Predicted']
})

df['Human Annotation'] = df['Human Annotation'].str.strip()
df['Few_shot'] = df['Few_shot'].str.strip()

mismatched = df[df['Human Annotation'] != df['Few_shot']]
file = './fewshot_mismatched.csv'
mismatched.to_csv(file, index=False)

print(mismatched)

                                            Sentences Human Annotation  \
12                      I am as merry as a school-boy       Comparison   
13                    I am as giddy as a drunken man.       Comparison   
20                          It's as lovely as a rose.   Not Applicable   
21  He would stand still and lay his head on the m...       Comparison   
36  Few treasures are worth as much as a friend wh...           Simile   
37                        He's as drunk as a fiddler.       Comparison   
38  Her mouth is smoother than oil, but in the end...           Simile   
43               He is a figment as much as a figure.   Not Applicable   
50  If rising sea levels flood the Maldive Islands...           Simile   
51  So February’s policy note is a stunning revers...       Comparison   
59  Little by little the sky cleared. The sun came...       Comparison   
60  Tom isn't as naive as a lot of people think he...   Not Applicable   
72       A nod is as good as a wink to