# Customer Feedback Data Visualization

In [1]:
# Required packages
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# %pip install nbformat

In [3]:
# Load data
df = pd.read_csv("../data/full/full_reviews.csv")
df.info()
df.head()
df.tail()
df.describe()
df.columns
df.dtypes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1165 entries, 0 to 1164
Data columns (total 26 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     1165 non-null   object 
 1   title                  1165 non-null   object 
 2   review                 1165 non-null   object 
 3   rating                 1165 non-null   int64  
 4   reply                  1156 non-null   object 
 5   experienceDate         1165 non-null   object 
 6   createdDateTime        1165 non-null   object 
 7   publishedDate          1165 non-null   object 
 8   replyPublishedDate     1156 non-null   object 
 9   reviewExperienceDelay  1165 non-null   float64
 10  date                   1165 non-null   object 
 11  year                   1165 non-null   int64  
 12  yearQuarter            1165 non-null   object 
 13  yearMonth              1165 non-null   object 
 14  month                  1165 non-null   int64  
 15  mont

id                        object
title                     object
review                    object
rating                     int64
reply                     object
experienceDate            object
createdDateTime           object
publishedDate             object
replyPublishedDate        object
reviewExperienceDelay    float64
date                      object
year                       int64
yearQuarter               object
yearMonth                 object
month                      int64
monthName                 object
day                        int64
dayName                   object
hour                       int64
replyYear                float64
replyMonth               float64
replyDay                 float64
replyHour                float64
reviewLength               int64
titleLength                int64
sentiment                 object
dtype: object

## Prepare data

In [4]:
# sort value by date
df.sort_values(by="createdDateTime", ascending=True, inplace=True)

df.head(2)

Unnamed: 0,id,title,review,rating,reply,experienceDate,createdDateTime,publishedDate,replyPublishedDate,reviewExperienceDelay,...,day,dayName,hour,replyYear,replyMonth,replyDay,replyHour,reviewLength,titleLength,sentiment
18,5db3d8dc60485809405772f8,Livraison et colis parfait,livraison colis parfait,5,"Bonjour, \n\nMerci pour votre commentaire, il ...",2019-10-26 05:25:48,2019-10-26 05:25:48,2019-10-26 05:25:48,2019-10-28 13:23:27.473000+00:00,0.0,...,26,Saturday,5,2019.0,10.0,28.0,13.0,26,26,positive
17,5db40a1b60485808b8152de6,Très bien,site service après vente toutes questions très...,5,"Bonjour, \n\nMerci pour votre commentaire, il ...",2019-10-26 08:55:55,2019-10-26 08:55:55,2019-10-26 08:55:55,2019-10-28 13:17:31.136000+00:00,0.0,...,26,Saturday,8,2019.0,10.0,28.0,13.0,79,9,positive


In [5]:
# Prepare data

# Convert date columns to datetime
df["createdDateTime"] = pd.to_datetime(df["createdDateTime"])
#df["createdDateTime"] = df["createdDateTime"].dt.tz_localize(None)
df["publishedDate"] = pd.to_datetime(df["publishedDate"])
try:
    df["replyPublishedDate"] = pd.to_datetime(df["replyPublishedDate"])
except:
    df["replyPublishedDate"] = None
df["experienceDate"] = pd.to_datetime(df["experienceDate"])



## Reviews Volume

In [6]:
# Daily Review Volume Trend
data = df.groupby("date")["id"].count()
daily_rev_trend_fig = px.line(data,  title="Daily Review Volume Trend Over Time")
daily_rev_trend_fig.show()


In [7]:
data

date
2019-10-26     5
2019-10-27     2
2019-10-28     6
2019-10-29     5
2019-10-30     7
              ..
2020-01-06    74
2020-01-07     8
2020-01-08    26
2020-01-09    25
2020-01-10     6
Name: id, Length: 74, dtype: int64

In [8]:
# Monthly Review Volume Trend
data = df.groupby("yearMonth")["id"].count()
monthly_rev_trend_fig = px.line(data,  title="Monthly Review Volume Trend")
monthly_rev_trend_fig.show()

## Sentiment Analysis

In [9]:
# Sentiment répartition (Pie chart)
sent_rep_fig = px.pie(df, names="sentiment", title="Sentiment Repartition")
sent_rep_fig.show()

In [10]:
# Sentiment Analysis
sent_dist_fig = px.histogram(df, x="sentiment", color = "sentiment", title="Sentiment Distribution")
sent_dist_fig.show()

In [11]:
# Annual Sentiment trend
data = df.groupby("year")["sentiment"].value_counts().unstack().fillna(0)
sent_trend_fig = px.bar(data, title="Yearly Sentiment Trend Over Time")
sent_trend_fig.show()

In [12]:
# Sentiment count by month
data = df.groupby("yearMonth")["sentiment"].value_counts().unstack().fillna(0)
fig = px.bar(data, title="Sentiment count by month", barmode="stack")
fig.show()

In [13]:
# Daily Sentiment Trend Over Time
data = df.groupby("date")["sentiment"].value_counts().unstack().fillna(0)

sent_trend_fig = px.line(data,  title="daily Sentiment Trend Over Time")
sent_trend_fig.show()

## Review Rating Analysis

In [14]:
# Sentiment repartition (Pie chart)
rating_rep_fig = px.pie(df, names="rating", title="Rating Repartition")
rating_rep_fig.show()

In [15]:
# YEARLY Rating Trend
data = df.groupby("year")["rating"].value_counts().unstack().fillna(0)
yearly_rating_trend_fig = px.bar(data, title="Yearly Rating Trend")
yearly_rating_trend_fig.show()

In [16]:
# Monthly Rating Trend
data = df.groupby("yearMonth")["rating"].value_counts().unstack().fillna(0)
monthly_rating_trend_fig = px.bar(data, title="Monthly Rating Trend", barmode="group")
monthly_rating_trend_fig.show()

In [17]:
# Daily Rating Trend
data = df.groupby("date")["rating"].value_counts().unstack().fillna(0)
daily_rating_trend_fig = px.line(data, title="Daily Rating Trend")
daily_rating_trend_fig.show()

## Reply & Response Time Analysis

In [34]:
fig9 = px.bar(df["reply"].notna().value_counts(), title="Reviews with and without Reply")
fig9.show()
data = df[df["reply"].notna()].copy()
data["replyPublishedDate"] = pd.to_datetime(data["replyPublishedDate"])
data["publishedDate"] = pd.to_datetime(data["publishedDate"])
fig10 = px.histogram(data, x="reviewExperienceDelay", title="Reply Time Distribution (Hours)", nbins=50)
fig10.show()

## Review Lenght Analysis

In [36]:
# 8. Review Length Analysis
fig16 = px.histogram(df, x="reviewLength", title="Distribution of Review Lengths", nbins=50)

fig17 = px.box(df, x="sentiment", y="reviewLength", title="Review Length per Sentiment")

fig18 = px.scatter(df, x="reviewLength", y="rating", title="Review Length vs Rating")

# Show new figures
for fig in [fig16, fig17, fig18]:
    fig.show()

## Textual Analysis