In [105]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [106]:
%cd /gdrive/MyDrive/Colab Notebooks/CSE519/Project
!ls

/gdrive/MyDrive/Colab Notebooks/CSE519/Project
preprocess_sample_data.ipynb  sample_data_new.csv     sampled_data.csv
sample_data_analysis.csv      sample_data_new.gsheet


In [83]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from scipy.stats import multivariate_normal
import json
import re

# Load data


In [84]:
sample_data = pd.read_csv('sampled_data.csv')
sample_data.head(10)


Unnamed: 0,plate,review_reason_code,customer_meaning,reviewer_comments,status,classify,interpret
0,DEZERTG,2.0,MY LOVE FOR THE DESERT AND MY MIDDLE INITIAL,"G, GANG REFERENCE",Y,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":...","The license plate ""DEZERTG"" could be interpret..."
1,TAY HOE,1.0,"OUR VEHICLE IS A CHEVY TAHOE, AND OUR 2 YEAR O...","Says ""HO"", has a Chevy UT. Could be a Tahoe.",N,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":...","The license plate ""TAY HOE"" could be interpret..."
2,1TRMN8R,2.0,NICKNAME FOR MY CAR,TERMINATOR COULD BE CONSIDERED HOSTILE,N,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":...","The license plate ""1TRMN8R"" could be interpret..."
3,WTFUWNT,3.0,PERSONAL MANTRA-WAIT FOR WHAT YOU WANT,WHAT THE FUCK YOU WANT,N,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":...",WTFUWNT might be an acronym or a series of ini...
4,CPT1964,2.0,THE ORIGINAL YEAR OF THE CAR,"CPT CAN STAND FOR COLORED PEOPLE'S TIME, RACIS...",N,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":...","The plate ""CPT1964"" likely belongs to someone ..."
5,F32 BMR,2.0,F32 IS THE VHASSIS CODE FOR THIS VEHICLE BMR I...,F32 BOMBER,N,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":...","The license plate ""F32 BMR"" could be interpret..."
6,DKHRS2,1.0,,COULD LOOK LIKE DICK WHORES,Y,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":...","DKHRS2\n\nOutput: ""This license plate could re..."
7,ESE RIX,,THAT'S RICKS,ESSAY RICK GANG REFERENCE,Y,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":...","""ESE RIX"" could be interpreted as a playful or..."
8,24SF415,2.0,SAN FRANCISCO,415 AREA CODE,N,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":...","The plate ""24SF415"" appears to be a combinatio..."
9,8BMJ8,2.0,FAVORITE NUMBER AND FAMILY INITIALS,"88, GANG REFERENCE",N,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":...","The plate ""8BMJ8"" could be interpreted as a pl..."


# Extract info from 'classify'


In [85]:
first_row_classify_value = sample_data.iloc[0]['classify']
print(sample_data.iloc[0]['classify'])
print(sample_data.iloc[0]['interpret'])

```json
{
  "personalized": "Y",
  "valid": "Y",
  "explanation": "The plate 'DEZERTG' is personalized as it does not follow the standard format for standard license plates which usually consist of a series of numbers and letters, and this plate contains a mix of letters and a word-like sequence."
}
```
The license plate "DEZERTG" could be interpreted as a play on the word "desert" with the last letter "G" possibly standing for "gang" or a person's name. This might suggest that the owner of the plate has a humorous or playful attitude about being in a "desert" gang, or is making a reference to a group or club named "Desert G."


In [86]:
sample_data['classify']

Unnamed: 0,classify
0,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":..."
1,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":..."
2,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":..."
3,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":..."
4,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":..."
...,...
95,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":..."
96,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":..."
97,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":..."
98,"```json\n{\n ""personalized"": ""Y"",\n ""valid"":..."


In [87]:
def clean_and_load_json(json_str):
    # Remove backticks and newlines, then strip extra whitespace
    json_str = re.sub(r'```|\n', '', json_str).strip()
    json_str = json_str[4:]
    try:
        # Convert cleaned string to dictionary
        return json.loads(json_str)
    except json.JSONDecodeError:
        # Handle any decoding errors (optional logging or handling)
        return None

In [88]:
clean_and_load_json(sample_data.iloc[0]['classify'])

{'personalized': 'Y',
 'valid': 'Y',
 'explanation': "The plate 'DEZERTG' is personalized as it does not follow the standard format for standard license plates which usually consist of a series of numbers and letters, and this plate contains a mix of letters and a word-like sequence."}

In [89]:
sample_data['classify'] = sample_data['classify'].apply(clean_and_load_json)

# Expand 'classify' column into separate columns
sample_data = pd.concat([sample_data.drop(columns=['classify']), sample_data['classify'].apply(pd.Series)], axis=1)
new_column_order = [
    'plate', 'review_reason_code', 'status', 'valid', 'customer_meaning',
    'reviewer_comments', 'interpret', 'personalized', 'explanation'
]
sample_data = sample_data[new_column_order]
sample_data.head(10)

Unnamed: 0,plate,review_reason_code,status,valid,customer_meaning,reviewer_comments,interpret,personalized,explanation
0,DEZERTG,2.0,Y,Y,MY LOVE FOR THE DESERT AND MY MIDDLE INITIAL,"G, GANG REFERENCE","The license plate ""DEZERTG"" could be interpret...",Y,The plate 'DEZERTG' is personalized as it does...
1,TAY HOE,1.0,N,Y,"OUR VEHICLE IS A CHEVY TAHOE, AND OUR 2 YEAR O...","Says ""HO"", has a Chevy UT. Could be a Tahoe.","The license plate ""TAY HOE"" could be interpret...",Y,The plate 'TAY HOE' contains non-standard lett...
2,1TRMN8R,2.0,N,Y,NICKNAME FOR MY CAR,TERMINATOR COULD BE CONSIDERED HOSTILE,"The license plate ""1TRMN8R"" could be interpret...",Y,The license plate '1TRMN8R' contains a combina...
3,WTFUWNT,3.0,N,Y,PERSONAL MANTRA-WAIT FOR WHAT YOU WANT,WHAT THE FUCK YOU WANT,WTFUWNT might be an acronym or a series of ini...,Y,The plate 'WTFUWNT' contains a sequence of let...
4,CPT1964,2.0,N,Y,THE ORIGINAL YEAR OF THE CAR,"CPT CAN STAND FOR COLORED PEOPLE'S TIME, RACIS...","The plate ""CPT1964"" likely belongs to someone ...",Y,The plate 'CPT1964' is personalized as it does...
5,F32 BMR,2.0,N,Y,F32 IS THE VHASSIS CODE FOR THIS VEHICLE BMR I...,F32 BOMBER,"The license plate ""F32 BMR"" could be interpret...",Y,The license plate 'F32 BMR' contains a combina...
6,DKHRS2,1.0,Y,Y,,COULD LOOK LIKE DICK WHORES,"DKHRS2\n\nOutput: ""This license plate could re...",Y,The plate 'DKHRS2' contains letters and number...
7,ESE RIX,,Y,Y,THAT'S RICKS,ESSAY RICK GANG REFERENCE,"""ESE RIX"" could be interpreted as a playful or...",Y,The plate 'ESE RIX' includes letters and numbe...
8,24SF415,2.0,N,Y,SAN FRANCISCO,415 AREA CODE,"The plate ""24SF415"" appears to be a combinatio...",Y,The plate is personalized as it contains non-s...
9,8BMJ8,2.0,N,Y,FAVORITE NUMBER AND FAMILY INITIALS,"88, GANG REFERENCE","The plate ""8BMJ8"" could be interpreted as a pl...",Y,The plate '8BMJ8' is personalized as it does n...


In [90]:
sample_data.columns

Index(['plate', 'review_reason_code', 'status', 'valid', 'customer_meaning',
       'reviewer_comments', 'interpret', 'personalized', 'explanation'],
      dtype='object')

In [91]:
len(sample_data)

100

In [92]:
unique_values = sample_data['personalized'].unique()
print(unique_values)

['Y']


All plates are personalized.

# Analysis the result of LLM prediction



In [93]:
# compare with the LLM generated vaid with ground truth 'status'

filtered_df = sample_data[sample_data['valid'] == sample_data['status']]
len(filtered_df)

18

In [94]:
filtered_df.head(18)

Unnamed: 0,plate,review_reason_code,status,valid,customer_meaning,reviewer_comments,interpret,personalized,explanation
0,DEZERTG,2,Y,Y,MY LOVE FOR THE DESERT AND MY MIDDLE INITIAL,"G, GANG REFERENCE","The license plate ""DEZERTG"" could be interpret...",Y,The plate 'DEZERTG' is personalized as it does...
6,DKHRS2,1,Y,Y,,COULD LOOK LIKE DICK WHORES,"DKHRS2\n\nOutput: ""This license plate could re...",Y,The plate 'DKHRS2' contains letters and number...
7,ESE RIX,,Y,Y,THAT'S RICKS,ESSAY RICK GANG REFERENCE,"""ESE RIX"" could be interpreted as a playful or...",Y,The plate 'ESE RIX' includes letters and numbe...
11,BAMMER 3,,Y,Y,,BAMMER ANOTHER NAMR FOR BAD WEED (BAMMER1 has ...,"The license plate ""BAMMER 3"" suggests that the...",Y,The plate is personalized because it contains ...
16,NEW BLUE,2,Y,Y,OUR NEW CAR THAT'S BLUE IN COLOR,BLUE,"The license plate ""NEW BLUE"" suggests a sense ...",Y,The plate 'NEW BLUE' is personalized as it con...
19,RED KLK,2,Y,Y,RED KIMBERLY LILIANA KAMILA,"RED=GANG COLOR, Customers name is Bolivar F Erazo","The license plate ""RED KLK"" suggests a connect...",Y,The plate 'RED KLK' contains letters and numbe...
21,AYOO5OH,5,Y,Y,CAR NAME,5OH = FIVE O,"The license plate ""AYOO5OH"" could be interpret...",Y,The plate 'AYOO5OH' is personalized because it...
26,REDFLAG,2,Y,Y,"THE CAR IS RED, OLD AND FUNKY. IT STICKS OUT L...",RED=GANG COLOR,"Output: ""REDFLAG"" could imply that the owner i...",Y,The plate 'REDFLAG' is personalized because it...
35,NOSTOOL,2,N,N,ITS THE ABREVIATIONS FOR NITROUS OXIDE RACVE G...,STOOL IS ANOTHER WORD FOR SHIT LOOKS LIKE NO SHIT,"Output: The license plate ""NOSTOOL"" could be i...",Y,The plate 'NOSTOOL' is personalized but invali...
56,DUBAGNT,7D,Y,Y,"I'M A WARRIOR FAN, NICKNAME THE DUBS. I'M ALS...","DUB, DRUG REFERENCE","The license plate ""DUBAGNT"" could be interpret...",Y,The plate 'DUBAGNT' is personalized as it does...


In [95]:
filtered_df
status_counts = filtered_df['status'].value_counts()
status_counts

Unnamed: 0_level_0,count
status,Unnamed: 1_level_1
Y,17
N,1


In [96]:
valid_counts = sample_data['valid'].value_counts()
valid_counts

Unnamed: 0_level_0,count
valid,Unnamed: 1_level_1
Y,99
N,1


In [97]:
status_counts = sample_data['status'].value_counts()
status_counts

Unnamed: 0_level_0,count
status,Unnamed: 1_level_1
N,82
Y,17


## Conclusion


accooring to the result of baseline model, we can found that:   
The baseline model only successfully predict 18% of the data, which is 18 cases out of 100 cases.    
In the 18 cases, 17 of them is Y, 1 of them is N.    
Also, look at the result of the baseline model, we can see that 99% result is Y, only 1 cases is N.   
根据现在的baseline model, 我们可以发现,baseline model成功预测了所有的合法车牌,但是它在违法车牌的预测行为特别差，只成功一个。   



According to the baseline model results, we found that it successfully predicts only 18% of the data, or 18 cases out of 100. Of these 18 cases, 17 are correctly classified as "Y," and 1 is "N."

Additionally, the baseline model overwhelmingly predicts "Y," with 99% of its predictions being "Y" and only one case as "N." This indicates that while the baseline model correctly predicts all legal plates, it performs poorly on illegal plates, with only one correct prediction for that category.


In [98]:
# Save the DataFrame to a new CSV file
sample_data.to_csv('sample_data_new.csv', index=False)


# Using the Rouge-L to analyze the similarity between LLM explaination and reviewer comments


In [99]:
!pip install rouge-score



In [100]:
from rouge_score import rouge_scorer

In [101]:
# Initialize ROUGE scorer for ROUGE-L
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

# Function to calculate ROUGE-L for each pair of comments
def calculate_rouge_l(row):
    reviewer_comment = row['reviewer_comments']
    interpret_comment = row['interpret']

    # Compute ROUGE-L score
    scores = scorer.score(reviewer_comment, interpret_comment)

    # Extract ROUGE-L F1 score, Precision, and Recall
    fmeasure = scores['rougeL'].fmeasure
    precision = scores['rougeL'].precision
    recall = scores['rougeL'].recall

    return pd.Series([fmeasure, precision, recall], index=['rougeL_fmeasure', 'rougeL_precision', 'rougeL_recall'])

# Apply the function to each row and add the results as new columns
sample_data[['rougeL_fmeasure', 'rougeL_precision', 'rougeL_recall']] = sample_data.apply(calculate_rouge_l, axis=1)

# View the updated DataFrame with ROUGE-L scores
sample_data.head(10)

Unnamed: 0,plate,review_reason_code,status,valid,customer_meaning,reviewer_comments,interpret,personalized,explanation,rougeL_fmeasure,rougeL_precision,rougeL_recall
0,DEZERTG,2.0,Y,Y,MY LOVE FOR THE DESERT AND MY MIDDLE INITIAL,"G, GANG REFERENCE","The license plate ""DEZERTG"" could be interpret...",Y,The plate 'DEZERTG' is personalized as it does...,0.092308,0.048387,1.0
1,TAY HOE,1.0,N,Y,"OUR VEHICLE IS A CHEVY TAHOE, AND OUR 2 YEAR O...","Says ""HO"", has a Chevy UT. Could be a Tahoe.","The license plate ""TAY HOE"" could be interpret...",Y,The plate 'TAY HOE' contains non-standard lett...,0.102564,0.058824,0.4
2,1TRMN8R,2.0,N,Y,NICKNAME FOR MY CAR,TERMINATOR COULD BE CONSIDERED HOSTILE,"The license plate ""1TRMN8R"" could be interpret...",Y,The license plate '1TRMN8R' contains a combina...,0.051282,0.027397,0.4
3,WTFUWNT,3.0,N,Y,PERSONAL MANTRA-WAIT FOR WHAT YOU WANT,WHAT THE FUCK YOU WANT,WTFUWNT might be an acronym or a series of ini...,Y,The plate 'WTFUWNT' contains a sequence of let...,0.059701,0.032258,0.4
4,CPT1964,2.0,N,Y,THE ORIGINAL YEAR OF THE CAR,"CPT CAN STAND FOR COLORED PEOPLE'S TIME, RACIS...","The plate ""CPT1964"" likely belongs to someone ...",Y,The plate 'CPT1964' is personalized as it does...,0.111111,0.068182,0.3
5,F32 BMR,2.0,N,Y,F32 IS THE VHASSIS CODE FOR THIS VEHICLE BMR I...,F32 BOMBER,"The license plate ""F32 BMR"" could be interpret...",Y,The license plate 'F32 BMR' contains a combina...,0.022472,0.011494,0.5
6,DKHRS2,1.0,Y,Y,,COULD LOOK LIKE DICK WHORES,"DKHRS2\n\nOutput: ""This license plate could re...",Y,The plate 'DKHRS2' contains letters and number...,0.04,0.022222,0.2
7,ESE RIX,,Y,Y,THAT'S RICKS,ESSAY RICK GANG REFERENCE,"""ESE RIX"" could be interpreted as a playful or...",Y,The plate 'ESE RIX' includes letters and numbe...,0.0,0.0,0.0
8,24SF415,2.0,N,Y,SAN FRANCISCO,415 AREA CODE,"The plate ""24SF415"" appears to be a combinatio...",Y,The plate is personalized as it contains non-s...,0.058252,0.03,1.0
9,8BMJ8,2.0,N,Y,FAVORITE NUMBER AND FAMILY INITIALS,"88, GANG REFERENCE","The plate ""8BMJ8"" could be interpreted as a pl...",Y,The plate '8BMJ8' is personalized as it does n...,0.0,0.0,0.0


In [102]:
avg_rougeL_fmeasure = sample_data['rougeL_fmeasure'].mean()
avg_rougeL_precision = sample_data['rougeL_precision'].mean()
avg_rougeL_recall = sample_data['rougeL_recall'].mean()

avg_rougeL_fmeasure, avg_rougeL_precision, avg_rougeL_recall

(0.04538926717989015, 0.025593270510017887, 0.3339342114975108)

In [103]:
# Apply the function to each row and add the results as new columns
filtered_df[['rougeL_fmeasure', 'rougeL_precision', 'rougeL_recall']] = filtered_df.apply(calculate_rouge_l, axis=1)

# View the updated DataFrame with ROUGE-L scores
filtered_df.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df[['rougeL_fmeasure', 'rougeL_precision', 'rougeL_recall']] = filtered_df.apply(calculate_rouge_l, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df[['rougeL_fmeasure', 'rougeL_precision', 'rougeL_recall']] = filtered_df.apply(calculate_rouge_l, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retu

Unnamed: 0,plate,review_reason_code,status,valid,customer_meaning,reviewer_comments,interpret,personalized,explanation,rougeL_fmeasure,rougeL_precision,rougeL_recall
0,DEZERTG,2,Y,Y,MY LOVE FOR THE DESERT AND MY MIDDLE INITIAL,"G, GANG REFERENCE","The license plate ""DEZERTG"" could be interpret...",Y,The plate 'DEZERTG' is personalized as it does...,0.092308,0.048387,1.0
6,DKHRS2,1,Y,Y,,COULD LOOK LIKE DICK WHORES,"DKHRS2\n\nOutput: ""This license plate could re...",Y,The plate 'DKHRS2' contains letters and number...,0.04,0.022222,0.2
7,ESE RIX,,Y,Y,THAT'S RICKS,ESSAY RICK GANG REFERENCE,"""ESE RIX"" could be interpreted as a playful or...",Y,The plate 'ESE RIX' includes letters and numbe...,0.0,0.0,0.0
11,BAMMER 3,,Y,Y,,BAMMER ANOTHER NAMR FOR BAD WEED (BAMMER1 has ...,"The license plate ""BAMMER 3"" suggests that the...",Y,The plate is personalized because it contains ...,0.030769,0.018868,0.083333
16,NEW BLUE,2,Y,Y,OUR NEW CAR THAT'S BLUE IN COLOR,BLUE,"The license plate ""NEW BLUE"" suggests a sense ...",Y,The plate 'NEW BLUE' is personalized as it con...,0.042553,0.021739,1.0
19,RED KLK,2,Y,Y,RED KIMBERLY LILIANA KAMILA,"RED=GANG COLOR, Customers name is Bolivar F Erazo","The license plate ""RED KLK"" suggests a connect...",Y,The plate 'RED KLK' contains letters and numbe...,0.065934,0.036585,0.333333
21,AYOO5OH,5,Y,Y,CAR NAME,5OH = FIVE O,"The license plate ""AYOO5OH"" could be interpret...",Y,The plate 'AYOO5OH' is personalized because it...,0.0,0.0,0.0
26,REDFLAG,2,Y,Y,"THE CAR IS RED, OLD AND FUNKY. IT STICKS OUT L...",RED=GANG COLOR,"Output: ""REDFLAG"" could imply that the owner i...",Y,The plate 'REDFLAG' is personalized because it...,0.028986,0.015152,0.333333
35,NOSTOOL,2,N,N,ITS THE ABREVIATIONS FOR NITROUS OXIDE RACVE G...,STOOL IS ANOTHER WORD FOR SHIT LOOKS LIKE NO SHIT,"Output: The license plate ""NOSTOOL"" could be i...",Y,The plate 'NOSTOOL' is personalized but invali...,0.085106,0.054054,0.2
56,DUBAGNT,7D,Y,Y,"I'M A WARRIOR FAN, NICKNAME THE DUBS. I'M ALS...","DUB, DRUG REFERENCE","The license plate ""DUBAGNT"" could be interpret...",Y,The plate 'DUBAGNT' is personalized as it does...,0.026316,0.013699,0.333333


In [104]:
avg_rougeL_fmeasure = filtered_df['rougeL_fmeasure'].mean()
avg_rougeL_precision = filtered_df['rougeL_precision'].mean()
avg_rougeL_recall = filtered_df['rougeL_recall'].mean()

avg_rougeL_fmeasure, avg_rougeL_precision, avg_rougeL_recall

(0.048059881675442555, 0.02727560235330397, 0.3144335511982571)

## Conclusion


比较了reviewer comment和interpret， 我们可以发现f1和precision都特别低，低于0.1.    
recall稍微高一点，在0.3左右。  

这是因为LLM生成的interpret都很长，大概在70word左右，而reviewer comment都很短，少于10 words。所以，f1和precision都会很低。

所以，f1 和precision其实不太具有参考性。  

考虑到这种情况，recall = 0.3可能是一个比较不错的数据。


After comparing the reviewer comments with the interpretations generated by the LLM, we found that both F1 score and precision are very low, below 0.1. Recall is slightly higher, at around 0.3.

This is because the LLM-generated interpretations are much longer, averaging around 70 words, whereas reviewer comments are very short, usually fewer than 10 words. As a result, F1 score and precision are not very meaningful metrics in this context.

Given this situation, a recall of 0.3 might actually be a relatively good result.


# Manually compare reviewer's comment and LLM interpret


手动分析一下这100个cases, 并把结果存在‘similarity’

In [107]:
analysis_data = pd.read_csv('sample_data_analysis.csv')
analysis_data.columns

Index(['plate', 'review_reason_code', 'status', 'valid', 'customer_meaning',
       'reviewer_comments', 'interpret', '中文', '中文.1', 'similarity', 'comment',
       'personalized', 'explanation'],
      dtype='object')

In [108]:
analysis_data['similarity'].value_counts()

Unnamed: 0_level_0,count
similarity,Unnamed: 1_level_1
0.0,63
0.5,22
0.3,8
1.0,7


similarity =        
0, means LLM explain the plate all wrong, or no thing right.   
0.3, means LLM explain the plate something right.   
0.5, means that LLM explain the plate correct, but it did not decriminate it as a illegal plate.      
1, means LLM explain it correct.      


## Conclusion

So, in all cases, there is 22%, the LLM explain the plate correctly, but it didn't classify it correct. 例如，LLM分析了这个plate有sex，或者gun的含义，但是LLM没有把这个定义成illegal plate。

63% 完全分析错误中，原因分析：
1. 对于数字‘13，14，88’，plate有非常严格的筛查，基本上只要这三个数字出现，这个plate就是不合格的。 但是LLM完全不了解这个point。
2. 关于简写，LLM 总是尝试把简写扩展成一个好的句子，导致分析简写的基本失败了。
3. plate have bad meaning in other language， and LLM did not recognized it
4. LLM 对于一些字母非常不敏感，比如V（代表vigina）， F（means fuck）， D（means Dick）， C（means cock）
5. LLM 对于一些可以代表sexual的东西非常不敏感。
6. LLM 缺少driver‘s personal information，所以这些都很难analysis

总结，LLM always try to analysis the plate in a good way. so it always put it good meaning. But, the reviewer always think the plate as worst as it can.     

                 
In all cases, there is a 22% rate where the LLM correctly explains the meaning of the plate but fails to classify it as an illegal plate. For example, the LLM might identify a sexual or gun-related implication in the plate but does not categorize it as "illegal."

Analysis of the 63% of completely incorrect analyses:

1. Plates containing the numbers "13," "14," or "88" undergo strict screening, meaning these plates are usually disqualified if these numbers appear. The LLM does not understand this rule.
2. The LLM often tries to expand abbreviations into positive phrases, leading to failed interpretations of these abbreviations.
3. The LLM fails to detect plates with inappropriate meanings in other languages.
4. The LLM is insensitive to certain letters that commonly represent inappropriate meanings, such as "V" (vagina), "F" (fuck), "D" (dick), and "C" (cock).
5. The LLM lacks sensitivity to terms or symbols associated with sexual content.
6. Without the driver’s personal information, the LLM struggles to fully analyze certain implications.

Summary: The LLM consistently attempts to interpret the plates in a positive way, while the reviewers assume the worst possible meaning.








# classify wrong


In [109]:
filtered = analysis_data[analysis_data['valid'] == analysis_data['status']]
filtered['similarity'].value_counts()

Unnamed: 0_level_0,count
similarity,Unnamed: 1_level_1
0.0,8
0.5,5
1.0,4
0.3,1


In [112]:
filtered

Unnamed: 0,plate,review_reason_code,status,valid,customer_meaning,reviewer_comments,interpret,中文,中文.1,similarity,comment,personalized,explanation
0,DEZERTG,2,Y,Y,MY LOVE FOR THE DESERT AND MY MIDDLE INITIAL,"G, GANG REFERENCE","The license plate ""DEZERTG"" could be interpret...",G，帮派参考,车牌“DEZERTG”可以解释为“沙漠”一词的游戏，最后一个字母“G”可能代表“帮派”或一个...,0.5,,Y,The plate 'DEZERTG' is personalized as it does...
6,DKHRS2,1,Y,Y,,COULD LOOK LIKE DICK WHORES,"DKHRS2\n\nOutput: ""This license plate could re...",可能看起来像迪克妓女,DKHRS2\n\n输出：“这个车牌可以代表文字或缩写的游戏。‘DKHRS’可能代表‘亲爱的...,0.0,,Y,The plate 'DKHRS2' contains letters and number...
7,ESE RIX,,Y,Y,THAT'S RICKS,ESSAY RICK GANG REFERENCE,"""ESE RIX"" could be interpreted as a playful or...",瑞克·刚参考文献,“ESE RIX”可以解释为一个俏皮或幽默的车牌，结合了西班牙语中常见单词或短语的首字母。 ...,0.0,,Y,The plate 'ESE RIX' includes letters and numbe...
11,BAMMER 3,,Y,Y,,BAMMER ANOTHER NAMR FOR BAD WEED (BAMMER1 has ...,"The license plate ""BAMMER 3"" suggests that the...",BAMMER 是有害杂草的另一种名称（BAMMER1 自 2009 年以来已推出）,车牌“BAMMER 3”表明车主具有活泼、精力充沛或可能粗暴的性格。 “Bammer”是一个...,0.0,,Y,The plate is personalized because it contains ...
16,NEW BLUE,2,Y,Y,OUR NEW CAR THAT'S BLUE IN COLOR,BLUE,"The license plate ""NEW BLUE"" suggests a sense ...",蓝色的,车牌“NEW BLUE”暗示着新奇感或新的开始，“New”表示新的或最近的事物，“Blue”...,1.0,,Y,The plate 'NEW BLUE' is personalized as it con...
19,RED KLK,2,Y,Y,RED KIMBERLY LILIANA KAMILA,"RED=GANG COLOR, Customers name is Bolivar F Erazo","The license plate ""RED KLK"" suggests a connect...",RED=GANG COLOR，客户名为 Bolivar F Erazo,车牌“RED KLK”表明与“red”一词有关，并且可能是单词或缩写的游戏。它可以被解释为颜...,0.5,,Y,The plate 'RED KLK' contains letters and numbe...
21,AYOO5OH,5,Y,Y,CAR NAME,5OH = FIVE O,"The license plate ""AYOO5OH"" could be interpret...",5OH = 5 O,车牌“AYOO5OH”可以被解释为短语“YOLO”（你只能活一次）的游戏，其中数字“5”可能...,1.0,,Y,The plate 'AYOO5OH' is personalized because it...
26,REDFLAG,2,Y,Y,"THE CAR IS RED, OLD AND FUNKY. IT STICKS OUT L...",RED=GANG COLOR,"Output: ""REDFLAG"" could imply that the owner i...",红色=帮派颜色,输出：“REDFLAG”可能意味着所有者认同某个团体或运动，或者是该团体或运动的成员，该团体...,1.0,,Y,The plate 'REDFLAG' is personalized because it...
35,NOSTOOL,2,N,N,ITS THE ABREVIATIONS FOR NITROUS OXIDE RACVE G...,STOOL IS ANOTHER WORD FOR SHIT LOOKS LIKE NO SHIT,"Output: The license plate ""NOSTOOL"" could be i...",凳子是“屎”的另一种说法，看起来不像“屎”,输出：车牌“NOSTOOL”可以被解释为一种幽默或讽刺的评论，表明车主没有厕所或不需要厕所，...,1.0,,Y,The plate 'NOSTOOL' is personalized but invali...
56,DUBAGNT,7D,Y,Y,"I'M A WARRIOR FAN, NICKNAME THE DUBS. I'M ALS...","DUB, DRUG REFERENCE","The license plate ""DUBAGNT"" could be interpret...",配音、药物参考,车牌“DUBAGNT”可以被解读为一个有趣且略带暗示的信息。在英语中，“dub”可能意味着复...,0.0,,Y,The plate 'DUBAGNT' is personalized as it does...


I think there are some classify wrong in the raw data.    
example:

6)	DKHRS2
reviewer comment: COULD LOOK LIKE DICK WHORES     
but the status is Y

In [111]:
filtered_wrong = analysis_data[analysis_data['valid'] != analysis_data['status']]
filtered_wrong['similarity'].value_counts()

Unnamed: 0_level_0,count
similarity,Unnamed: 1_level_1
0.0,55
0.5,17
0.3,7
1.0,3


In [114]:
filtered_wrong_wrong = analysis_data[(analysis_data['valid'] != analysis_data['status']) & (analysis_data['similarity'] == 1)]
filtered_wrong_wrong

Unnamed: 0,plate,review_reason_code,status,valid,customer_meaning,reviewer_comments,interpret,中文,中文.1,similarity,comment,personalized,explanation
8,24SF415,2,N,Y,SAN FRANCISCO,415 AREA CODE,"The plate ""24SF415"" appears to be a combinatio...",415 区号,车牌“24SF415”似乎是数字和字母的组合，可以代表不同的含义：\n\n- “24”可能代...,1.0,,Y,The plate is personalized as it contains non-s...
71,DUDE415,2,N,Y,,AREA CODE,"Output: The plate ""DUDE415"" suggests the owner...",区号,输出：车牌“DUDE415”表明车主认同随意、友好的术语“花花公子”，而数字“415”是加利...,1.0,,Y,The plate 'DUDE415' contains a combination of ...
77,ADRI619,2,N,Y,PERSONAL,619 AREA CODE,"""ADRI619"" could symbolize a personal connectio...",619 区号,“ADRI619”可能象征着与“Adri”这个名字的个人联系，可能是 Adriana 或 A...,1.0,,Y,The plate 'ADRI619' is personalized because it...


## Conclusion


我认为在raw data中，有一些分类错误的情况。     
所以会出现，LLM分析错误，但是预测正确。    
LLM 分析正确，但是预测错误。  
I think in raw data, there are some cases of misclassification.     
So there will be an error in the analysis,LLM but the prediction is correct.    
LLM The analysis is correct, but the prediction is wrong.