# Enhanced EDA for Olist E-commerce Dataset
This notebook includes:
- Order analysis
- Customer segmentation using RFM
- Sentiment analysis on review comments

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
from datetime import datetime

# Load datasets
orders = pd.read_csv('../data/olist_orders_dataset.csv')
customers = pd.read_csv('../data/olist_customers_dataset.csv')
payments = pd.read_csv('../data/olist_order_payments_dataset.csv')
reviews = pd.read_csv('../data/olist_order_reviews_dataset.csv')

# Convert date columns
orders['order_purchase_timestamp'] = pd.to_datetime(orders['order_purchase_timestamp'])

## RFM Analysis

In [None]:
# RFM Analysis
snapshot_date = orders['order_purchase_timestamp'].max() + pd.Timedelta(days=1)
rfm = orders.groupby('customer_id').agg({
    'order_purchase_timestamp': lambda x: (snapshot_date - x.max()).days,
    'order_id': 'count'
}).rename(columns={'order_purchase_timestamp': 'Recency', 'order_id': 'Frequency'})
rfm['Monetary'] = orders.groupby('customer_id')['order_id'].count()
rfm.head()

## Sentiment Analysis

In [None]:
def get_sentiment(text):
    try:
        return TextBlob(text).sentiment.polarity
    except:
        return None

reviews['sentiment'] = reviews['review_comment_message'].apply(get_sentiment)
reviews[['review_comment_message', 'sentiment']].head()