In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)

In [2]:
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import os

load_dotenv()

pg_conn_str = f"postgresql+psycopg2://{os.getenv('PG_USER')}:{os.getenv('PG_PASSWORD')}@{os.getenv('PG_HOST')}/{os.getenv('PG_DB')}"
engine = create_engine(pg_conn_str)


In [3]:
with engine.connect() as conn:
    conn.execute(text("""
        CREATE TABLE IF NOT EXISTS staging.customer_support_cleaned AS
        SELECT 
            ticket_id::BIGINT,
            TRIM(customer_name) AS customer_name,
            TRIM(issue_description) AS issue_description,
            TRIM(status) AS ticket_status
        FROM sql_project.customer_support_raw;
    """))
    conn.commit()


In [4]:
with engine.connect() as conn:
    conn.execute(text("""
        CREATE TABLE IF NOT EXISTS staging.trustpilot_reviews_cleaned AS
        SELECT 
            review_id::INT,
            TRIM(review) AS review_text,
            ticket_id::BIGINT
        FROM sql_project.trustpilot_reviews_cleaned
        WHERE review IS NOT NULL AND LENGTH(TRIM(review)) > 20;
    """))
    conn.commit()


In [5]:
with engine.connect() as conn:
    conn.execute(text("""
        CREATE TABLE IF NOT EXISTS warehouse.dim_tickets AS
        SELECT 
            ticket_id,
            customer_name,
            issue_description,
            ticket_status
        FROM staging.customer_support_cleaned;
    """))
    conn.commit()


In [6]:
with engine.connect() as conn:
    conn.execute(text("""
        CREATE TABLE IF NOT EXISTS warehouse.fact_reviews AS
        SELECT 
            review_id,
            review_text,
            ticket_id
        FROM staging.trustpilot_reviews_cleaned;
    """))
    conn.commit()


Business Question: Which customer support ticket statuses are most frequently associated with negative sentiment in Trustpilot reviews?

In [7]:
from sqlalchemy import text

query = text("""WITH sentiment_flagged AS (
    SELECT
        r.review_id,
        r.review,
        r.ticket_id,
        t.status AS ticket_status,
        CASE
            WHEN r.review ILIKE '%not%' OR r.review ILIKE '%bad%' OR
                 r.review ILIKE '%poor%' OR r.review ILIKE '%frustrat%' OR
                 r.review ILIKE '%slow%' OR r.review ILIKE '%issue%'
            THEN 'Negative'
            ELSE 'Neutral/Positive'
        END AS sentiment
    FROM sql_project.trustpilot_reviews_cleaned r
    LEFT JOIN sql_project.customer_support_raw t ON r.ticket_id::INT = t.ticket_id
),
sentiment_count AS (
    SELECT
        ticket_status,
        sentiment,
        COUNT(*) AS review_count
    FROM sentiment_flagged
    GROUP BY ticket_status, sentiment
),
ranked_status AS (
    SELECT *,
           RANK() OVER (PARTITION BY sentiment ORDER BY review_count DESC) AS rank_within_sentiment
    FROM sentiment_count
)
SELECT *
FROM ranked_status
ORDER BY sentiment, review_count DESC
""")

with engine.connect().execution_options(isolation_level="AUTOCOMMIT") as conn:
    df_reviews = pd.read_sql(query, conn)
    display(df_reviews)  # 👈 Required to render in GitHub


Unnamed: 0,ticket_status,sentiment,review_count,rank_within_sentiment
0,Open,Negative,11,1
1,Open,Neutral/Positive,88,1


Insight: The “Open” ticket status has the highest share of both negative and neutral/positive reviews, suggesting these reviews cluster before resolution.

Reccomendation: Encourage your support team to proactively resolve tickets earlier and ensure negative sentiment is addressed before it compounds.

Prediction:
Given that 11 out of 99 reviews were flagged as negative and tied to “Open” tickets, I predict that as the volume of reviews increases, the percentage of negative reviews tied to unresolved issues will grow unless support SLAs improve.
