In [1]:
import pandas as pd
import sys

# --- CONFIGURATION ---
file_path_in = "KBZ_Pay_Final_Correlation_Data_Ver2.csv"
file_path_out = "KBZ_Pay_Sentiment_Cate_Ver3.csv"
sentiment_column = 'Sentiment_Score_Translated'

# --- 1. SENTIMENT CODING FUNCTION ---
def categorize_sentiment(score):
    """
    Codes the continuous sentiment score into discrete categories: Positive, Neutral, Negative.
    """
    # Using thresholds of +/- 0.1 to define the neutral zone
    if score > 0.1:
        return 'Positive'
    elif score < -0.1:
        return 'Negative'
    else:
        return 'Neutral'


In [2]:
# --- 2. LOAD DATA ---
try:
    df = pd.read_csv(file_path_in, 
                     encoding='utf-8-sig', 
                     on_bad_lines='skip',
                     engine='python')
    print(f"DataFrame loaded successfully from: {file_path_in}")
    print("\nFirst 5 rows of the DataFrame:")
    print(df.head())
    
except Exception as e:
    print(f"!!! ERROR: Failed to load the file. Check file name and path. Details: {e}")
    sys.exit()

DataFrame loaded successfully from: KBZ_Pay_Final_Correlation_Data_Ver2.csv

First 5 rows of the DataFrame:
               Date  Rating                                         Review  \
0  11/11/2025 15:05       5                               My acc my moneys   
1  11/11/2025 12:23       5                                          Thank   
2  11/11/2025 11:50       5  မျက်နှာစကန်ပြန်ပြန်တောင်းနေတယ် အကောင့်ဖွင့်တာ   
3  11/11/2025 10:36       2                         good appointment for ✌   
4   11/11/2025 4:48       5                                           good   

   Is_Burmese  Sentiment_Score  \
0           0              0.0   
1           0              0.0   
2           1              0.0   
3           0              0.7   
4           0              0.7   

                                   Translated_Review  \
0                                   My acc my moneys   
1                                              Thank   
2  I'm asking for a face scan again to open an ac.

In [3]:
# --- 3. CREATE NEW CATEGORICAL COLUMN ---
if sentiment_column in df.columns:
    df['Sentiment_Category'] = df[sentiment_column].apply(categorize_sentiment)
    
    # Display the new column's distribution for verification
    print("\nSentiment Category Distribution:")
    print(df['Sentiment_Category'].value_counts())
else:
    print(f"!!! ERROR: Column '{sentiment_column}' not found. Cannot categorize sentiment.")
    sys.exit()


Sentiment Category Distribution:
Sentiment_Category
Positive    1009
Neutral      441
Negative      50
Name: count, dtype: int64


In [4]:
# --- 4. EXPORT TO NEW CSV FILE ---
# Use 'utf-8-sig' to ensure the output file is readable by Excel/SPSS and preserves all Burmese characters.
df.to_csv(file_path_out, index=False, encoding='utf-8-sig')

print(f"\n--- SUCCESS! ---")
print(f"Your final file, '{file_path_out}', has been created with the 'Sentiment_Category' column.")


--- SUCCESS! ---
Your final file, 'KBZ_Pay_Sentiment_Cate_Ver3.csv', has been created with the 'Sentiment_Category' column.
