In [2]:
import pandas as pd
from transformers import AutoTokenizer, TFBertForSequenceClassification
import tensorflow as tf

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df=pd.read_excel('updated_telecom_customer_data.xlsx')

In [4]:
df.head()

Unnamed: 0,CustomerID,Gender,SeniorCitizen,Partner,Dependents,Tenure,other PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,CustomerComments
0,1,Male,0,No,Yes,61,Yes,No phone service,DSL,No,...,Yes,No,No,No internet service,Month-to-month,No,Bank transfer (automatic),55.94,3284.38,"Happy with the service, no complaints."
1,2,Female,0,No,No,68,Yes,No,Fiber optic,No internet service,...,No,Yes,Yes,No,Two year,Yes,Credit card (automatic),41.95,151.56,Switching to another provider due to poor serv...
2,3,Male,0,No,No,62,No,Yes,No,No internet service,...,Yes,Yes,No,No internet service,Two year,No,Bank transfer (automatic),49.97,3445.57,"Terrible customer support, will not renew."
3,4,Male,1,Yes,No,1,Yes,No,Fiber optic,No internet service,...,No,No,Yes,Yes,Two year,No,Credit card (automatic),76.8,4758.77,"Experiencing frequent outages, very frustrating."
4,5,Male,0,Yes,Yes,53,Yes,Yes,Fiber optic,No,...,Yes,No internet service,No,Yes,One year,Yes,Credit card (automatic),106.4,832.79,Reasonable prices but internet speed is slow.


In [9]:
model_name = 'ydshieh/bert-base-uncased-yelp-polarity'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFBertForSequenceClassification.from_pretrained(model_name)


Some layers from the model checkpoint at ydshieh/bert-base-uncased-yelp-polarity were not used when initializing TFBertForSequenceClassification: ['dropout_37']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at ydshieh/bert-base-uncased-yelp-polarity.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


In [10]:
def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors='tf', truncation=True, padding=True, max_length=128)
    outputs = model(**inputs)
    logits = outputs.logits
    probabilities = tf.nn.softmax(logits, axis=-1)
    predicted_class = tf.argmax(probabilities, axis=-1).numpy()[0]
    
    # Map predicted class to sentiment label
    if predicted_class == 0:
        return 'Negative'
    elif predicted_class == 1:
        return 'Positive'
    else:
        return 'Neutral'

In [11]:
df['Sentiment'] = df['CustomerComments'].apply(predict_sentiment)


In [12]:
df.head(20)

Unnamed: 0,CustomerID,Gender,SeniorCitizen,Partner,Dependents,Tenure,other PhoneService,MultipleLines,InternetService,OnlineSecurity,...,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,CustomerComments,Sentiment
0,1,Male,0,No,Yes,61,Yes,No phone service,DSL,No,...,No,No,No internet service,Month-to-month,No,Bank transfer (automatic),55.94,3284.38,"Happy with the service, no complaints.",Positive
1,2,Female,0,No,No,68,Yes,No,Fiber optic,No internet service,...,Yes,Yes,No,Two year,Yes,Credit card (automatic),41.95,151.56,Switching to another provider due to poor serv...,Negative
2,3,Male,0,No,No,62,No,Yes,No,No internet service,...,Yes,No,No internet service,Two year,No,Bank transfer (automatic),49.97,3445.57,"Terrible customer support, will not renew.",Negative
3,4,Male,1,Yes,No,1,Yes,No,Fiber optic,No internet service,...,No,Yes,Yes,Two year,No,Credit card (automatic),76.8,4758.77,"Experiencing frequent outages, very frustrating.",Negative
4,5,Male,0,Yes,Yes,53,Yes,Yes,Fiber optic,No,...,No internet service,No,Yes,One year,Yes,Credit card (automatic),106.4,832.79,Reasonable prices but internet speed is slow.,Negative
5,6,Female,0,Yes,No,15,Yes,Yes,Fiber optic,No internet service,...,No internet service,No internet service,Yes,Two year,Yes,Mailed check,57.09,4002.53,"Terrible customer support, will not renew.",Negative
6,7,Male,1,Yes,No,54,Yes,No,Fiber optic,No,...,No internet service,Yes,No internet service,Month-to-month,Yes,Mailed check,52.07,4183.37,"Very helpful support team, fixed my issue quic...",Positive
7,8,Male,0,No,Yes,42,Yes,Yes,DSL,No internet service,...,Yes,No,No internet service,Two year,No,Mailed check,98.18,136.47,"Fast and reliable internet, good value for money.",Positive
8,9,Male,0,No,Yes,60,Yes,No phone service,Fiber optic,No,...,Yes,Yes,Yes,Month-to-month,No,Bank transfer (automatic),22.24,296.96,"Very helpful support team, fixed my issue quic...",Positive
9,10,Female,1,No,Yes,37,Yes,No,Fiber optic,No,...,No internet service,No internet service,No internet service,Two year,Yes,Credit card (automatic),50.21,566.0,Switching to another provider due to poor serv...,Negative


In [13]:
# Summarize sentiment distribution
sentiment_counts = df['Sentiment'].value_counts()
print("Sentiment Distribution:")
print(sentiment_counts)


Sentiment Distribution:
Sentiment
Negative    2510
Positive    2490
Name: count, dtype: int64


In [14]:
sentiment_vs_tenure = df.groupby('Sentiment')['Tenure'].mean()
sentiment_vs_monthly_charges = df.groupby('Sentiment')['MonthlyCharges'].mean()
sentiment_vs_contract = df.groupby('Sentiment')['Contract'].value_counts().unstack(fill_value=0)

sentiment_vs_total_charges = df.groupby('Sentiment')['TotalCharges'].mean()
print("\nAverage Total Charges by Sentiment:")
print(sentiment_vs_total_charges)

print("\nAverage Tenure by Sentiment:")
print(sentiment_vs_tenure)

print("\nAverage Monthly Charges by Sentiment:")
print(sentiment_vs_monthly_charges)

print("\nContract Type Distribution by Sentiment:")
print(sentiment_vs_contract)


Average Total Charges by Sentiment:
Sentiment
Negative    2532.215996
Positive    2535.110863
Name: TotalCharges, dtype: float64

Average Tenure by Sentiment:
Sentiment
Negative    35.517131
Positive    35.858635
Name: Tenure, dtype: float64

Average Monthly Charges by Sentiment:
Sentiment
Negative    69.759633
Positive    70.381932
Name: MonthlyCharges, dtype: float64

Contract Type Distribution by Sentiment:
Contract   Month-to-month  One year  Two year
Sentiment                                    
Negative              861       843       806
Positive              828       844       818


In [None]:
#Successfully classified the comments of customer to positive or negative.Also derived insights from the data as shown above.