In [None]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Download required NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')

# ------------------------------------------------
# Load data
# ------------------------------------------------
df = pd.read_excel("CSAT_ASAT_New.xlsx")

# ------------------------------------------------
# Text Cleaning Function
# ------------------------------------------------
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    if pd.isna(text):
        return ""
    text = text.lower()
    text = re.sub(r"[^a-zA-Z\s]", " ", text)
    words = [
        lemmatizer.lemmatize(w)
        for w in text.split()
        if w not in stop_words and len(w) > 2
    ]
    return " ".join(words)

# ================================================================
# 1Ô∏è‚É£ CSAT Verbatim Processing (Only ONE CSAT question)
# ================================================================
df["CSAT_Summary"] = df["What are your most important reasons for giving us that score?"].apply(clean_text)

# ================================================================
# 2Ô∏è‚É£ ASAT Score (Rename)
# ================================================================
df.rename(columns={
    "How would you rate the service you received from the consultant handling your enquiry?":
    "ASAT_Score"
}, inplace=True)

df["ASAT_Score"] = pd.to_numeric(df["ASAT_Score"], errors="coerce")

# ================================================================
# 3Ô∏è‚É£ ASAT Positive Verbatim (ASAT > 7)
# ================================================================
df.rename(columns={
    "We‚Äôd love to know what the consultant did to earn such a rating?":
    "ASAT_Positive_Verbatim"
}, inplace=True)

df["ASAT_Positive_Summary"] = np.where(
    df["ASAT_Score"] > 7,
    df["ASAT_Positive_Verbatim"].apply(clean_text),
    ""
)

# ================================================================
# 4Ô∏è‚É£ ASAT Improvement Verbatim (ASAT < 6)
# ================================================================
df.rename(columns={
    "How could the consultant improve how they handled your enquiry?":
    "ASAT_Improve_Verbatim"
}, inplace=True)

df["ASAT_Improve_Summary"] = np.where(
    df["ASAT_Score"] < 6,
    df["ASAT_Improve_Verbatim"].apply(clean_text),
    ""
)

# ================================================================
# 5Ô∏è‚É£ FCR TRANSFORMATION (STRICT VALUE MAPPING)
# ================================================================
df.rename(columns={
    "Thinking about your call, was your enquiry resolved?":
    "Enquiry_Resolved"
}, inplace=True)

def map_fcr(val):
    if pd.isna(val):
        return "NO"

    val = val.strip().lower()

    yes_values = [
        "yes, on first call",
        "yes on first call",
        "yes,on first call",
        "yes on first call,"
    ]

    if val in yes_values:
        return "YES"

    return "NO"

df["FCR_NEW"] = df["Enquiry_Resolved"].apply(map_fcr)

# ================================================================
# 6Ô∏è‚É£ DATE HIERARCHY (Using ONLY Completed Date 2)
# ================================================================
if "Completed Date 2" in df.columns:
    df["Completed Date 2"] = pd.to_datetime(df["Completed Date 2"], errors="coerce")

    df["Year"] = df["Completed Date 2"].dt.year
    df["Quarter"] = df["Completed Date 2"].dt.quarter
    df["Month_Number"] = df["Completed Date 2"].dt.month
    df["Month_Name"] = df["Completed Date 2"].dt.strftime("%B")
    df["Week_Number"] = df["Completed Date 2"].dt.isocalendar().week
    df["Day"] = df["Completed Date 2"].dt.day
    df["Day_Name"] = df["Completed Date 2"].dt.strftime("%A")

# ================================================================
# 7Ô∏è‚É£ Power BI-Friendly Cleanup
# ================================================================
df.fillna("", inplace=True)

# ================================================================
# 8Ô∏è‚É£ Save Final Output
# ================================================================
df.to_excel("CSAT_ASAT_Transformed_For_PowerBI.xlsx", index=False)

print("Transformation completed successfully with STRICT FCR mapping!")


In [None]:
#updated code

In [None]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

# Download NLTK stopwords
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))
ps = PorterStemmer()

# -------------------------------------------------------------------
# SAFE TEXT CLEAN FUNCTION (NO ERROR for int/float/None/Nan)
# -------------------------------------------------------------------
def clean_text(text):
    """Clean text for NLP/Word Cloud safely."""

    # Convert everything to string
    text = str(text)

    # Treat nan/none as empty
    if text.lower() in ["nan", "none", "na", ""]:
        return ""

    # Lowercase
    text = text.lower()

    # Remove special characters
    text = re.sub(r"[^a-zA-Z\s]", " ", text)

    # Tokenize and remove stopwords, apply stemming
    words = [ps.stem(w) for w in text.split() if w not in stop_words and len(w) > 2]

    return " ".join(words)


# -------------------------------------------------------------------
# LOAD FILE
# -------------------------------------------------------------------
df = pd.read_excel("CSAT_ASAT_New.xlsx")

# -------------------------------------------------------------------
# PROCESS 1: CSAT VERBATIM CLEANING
# Question: What are your most important reasons for giving us that score?
# -------------------------------------------------------------------
csat_reason_col = "What are your most important reasons for giving us that score?"

df['CSAT_Raw'] = df[csat_reason_col].astype(str)
df['CSAT_Summary'] = df['CSAT_Raw'].apply(clean_text)

# -------------------------------------------------------------------
# PROCESS 2: CSAT POSITIVE OPEN ENDED (Score >= 8)
# Question: We‚Äôd love to know what the consultant did to earn such a rating?
# -------------------------------------------------------------------
positive_q_col = "We‚Äôd love to know what the consultant did to earn such a rating?"

positive_mask = df['CSAT score'] >= 8

df['CSAT_Positive_Question'] = np.where(
    positive_mask, df[positive_q_col].astype(str), ""
)
df['CSAT_Positive_Summary'] = df['CSAT_Positive_Question'].apply(clean_text)

# -------------------------------------------------------------------
# PROCESS 3: CSAT NEGATIVE OPEN ENDED (Score <= 6)
# Question: How could the consultant improve how they handled your enquiry?
# -------------------------------------------------------------------
negative_q_col = "How could the consultant improve how they handled your enquiry?"

negative_mask = df['CSAT score'] <= 6

df['CSAT_Negative_Question'] = np.where(
    negative_mask, df[negative_q_col].astype(str), ""
)
df['CSAT_Negative_Summary'] = df['CSAT_Negative_Question'].apply(clean_text)

# -------------------------------------------------------------------
# PROCESS 4: FCR TRANSFORMATION BASED ON RESOLUTION QUESTION
# Column: Thinking about your call was your enquiry resolved?
# -------------------------------------------------------------------
resolution_col = "Thinking about your call was your enquiry resolved?"

def fcr_transform(val):
    val = str(val).lower()

    if "yes, on first call" in val:
        return "Yes"
    else:
        return "No"

df['FCR_New'] = df[resolution_col].apply(fcr_transform)

# -------------------------------------------------------------------
# PROCESS 5: DATATYPE FIXES FOR POWER BI
# -------------------------------------------------------------------

# Convert score to int safely
df['CSAT score'] = pd.to_numeric(df['CSAT score'], errors='coerce').fillna(0).astype(int)

# Fix date columns (if they exist)
date_cols = [
    'Completed Date',
    'Date Convert',
    'Completed Date 2',
    'INTERACTION DATE'
]

for col in date_cols:
    if col in df.columns:
        df[col] = pd.to_datetime(df[col], errors='coerce')

# Convert all verbatim columns to string
verbatim_cols = [
    'CSAT_Raw',
    'CSAT_Summary',
    'CSAT_Positive_Question',
    'CSAT_Positive_Summary',
    'CSAT_Negative_Question',
    'CSAT_Negative_Summary'
]

for col in verbatim_cols:
    if col in df.columns:
        df[col] = df[col].astype(str)

# -------------------------------------------------------------------
# PROCESS 6: SAVE FINAL FILE
# -------------------------------------------------------------------
output_file = "CSAT_ASAT_Transformed_For_PowerBI.xlsx"
df.to_excel(output_file, index=False)

print("‚ú® Transformation Complete!")
print(f"üìÑ Output saved as: {output_file}")
