In [1]:
import numpy as np
import pandas as pd
import os
import warnings

from scripts.utils import *
from app.utils import clean_comment, download_comments
from app.loader import model_setup
from app.ModelPredictor import ModelPredictor
from app.Plots import PlotResult

warnings.filterwarnings('ignore')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sajan\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
os.makedirs('app/plots', exist_ok=True)

In [3]:
model, tokenizer, sentiment_decoder, type_decoder, device = model_setup()
model_predictor = ModelPredictor(model, tokenizer, device)
plots = PlotResult()

In [5]:
def predict(comment: str):
    comment = clean_comment(comment)
    sentiment, comment_type, senti_proba, type_proba = model_predictor.get_predict(str(comment))

    return {
        "sentiment": sentiment_decoder.classes_[int(sentiment)],
        "comment_type": type_decoder.classes_[int(comment_type)],
        "sentiment_confidence": round((senti_proba*100), 2),
        "type_confidence": round((type_proba*100), 2)
    }

def analyze(url: str):
    # scrape the comments from the given youtube video
    comments = download_comments(url)
    # create a dataframe to hold the comments predictions
    df = pd.DataFrame()

    # get predictions for each comment and store in the dataframe
    sentiments, types, senti_proba, type_proba = model_predictor.get_predict(comments['comment'].tolist())
    df['sentiment'] = sentiments
    df['type'] = types
    df['senti_proba'] = senti_proba
    df['type_proba'] = type_proba

    
    # mapping the predicted classes and types with its names
    df['sentiment'] = df['sentiment'].astype(int).map(lambda x: sentiment_decoder.classes_[x])
    df['type'] = df['type'].astype(int).map(lambda x: type_decoder.classes_[x])

    plots_dir = 'app/plots'
    plots_path = {
        'sd' : f"{plots_dir}/{plots.generate_plot_name('sd')}",
        'sp' : f"{plots_dir}/{plots.generate_plot_name('sp')}",
        'td' : f"{plots_dir}/{plots.generate_plot_name('td')}",
        'tp' : f"{plots_dir}/{plots.generate_plot_name('tp')}"
    }

    # plot the predicted class distributions
    plots.plot_distribution(df, col="sentiment", title="Sentiment Distribution", xlabel="Comment Sentiments", ylabel="Number of Comments", save_path=plots_path['sd'])
    plots.plot_distribution(df, col="type", title="Type Distribution", xlabel="Comment Types", ylabel="Number of Comments", save_path=plots_path['td'])

    # plot the prediction confidence
    plots.plot_confidence(df, col="sentiment", proba_col="senti_proba", title="Model Confidence per Sentiment Class", xlabel="Confidence Score (%)", ylabel="Sentiment", save_path=plots_path['sp'])
    plots.plot_confidence(df, col="type", proba_col="type_proba", title="Model Confidence per Type Class", xlabel="Confidence Score (%)", ylabel="Type", save_path=plots_path['tp'])

    return {
        "total_comments": len(sentiments),
        "sentiment_distribution_plot": plots_path['sd'],
        "sentiment_confidence_plot": plots_path['sp'],
        "type_distribution_plot": plots_path['td'],
        "type_confidence_plot": plots_path['tp']
    }

In [None]:
predict("I loove this movie")

{'sentiment': 'positive',
 'comment_type': 'opinion',
 'sentiment_confidence': 65.53,
 'type_confidence': 63.88}

In [None]:
# 35 comment
analyze("yt video link")

{'total_comments': 35,
 'sentiment_distribution_plot': 'app/plots/sd_20260123180722_1cf30580.png',
 'sentiment_confidence_plot': 'app/plots/sp_20260123180722_593384ed.png',
 'type_distribution_plot': 'app/plots/td_20260123180722_07f1f460.png',
 'type_confidence_plot': 'app/plots/tp_20260123180722_d1a9f26d.png'}