In [1]:
import os
import sys
from os import path

sys.path.append(path.dirname(os.getcwd()))

In [2]:
import json

import pandas as pd

from project.models import YouTubeVideoInfo

with open("../data/myyounicon-01/videos_infos.json", "r") as json_file:
	videos = [YouTubeVideoInfo.from_json(v) for v in json.load(json_file)]

labels = pd.read_csv("../data/YouNiCon/conspiracy_label.csv")
labels = labels[labels["video_id"].isin([v.id for v in videos])]
conspiracy_videos = labels[labels["majority_label"] == 1]
non_conspiracy_videos = labels[labels["majority_label"] == 0]

print(
	f"Total instances: {len(labels)}, conspiracy instances: {len(conspiracy_videos)}, non conspiracy instances: {len(non_conspiracy_videos)}"
)

Total instances: 2515, conspiracy instances: 897, non conspiracy instances: 1618


## Text Only Models

In [None]:
from project.llm_models import prompts
from project.llm_models.inference import OllamaModel, generate

llama32_results = {}
for i in range(2):
	video = videos[i]
	prompt = prompts.input_full_text_output_binary_version_01()
	user_prompt = f"video title: {video.title}, video description: {video.description}"
	llama32_results[video.id] = generate(
		OllamaModel.LLAMA_3_2, system_prompt=prompt, user_prompt=user_prompt
	)
print(llama32_results)

{'-8u-hsE87xw': {'model': 'llama3.2', 'created_at': '2024-11-20T14:26:51.721391594Z', 'response': "This appears to be a long-form video or livestream with various content, including:\n\n1. News and updates on weather-related events (hail, flooding, record snow).\n2. Discussions about permaculture and its potential solutions for sustainable living.\n3. Promotions for the creator's projects and businesses, such as:\n\t* Oppenheimer Ranch Project: a self-sustaining homestead and organic farm in preparation for a potential collapse.\n\t* LeakCon2019: an event focused on exposing lies and promoting transparency.\n\t* Patreon support for continued content creation.\n4. Calls to action, including:\n\t* Joining the discussion on Discord.\n\t* Using specific coupon codes or links for discounts and free shipping.\n\t* Subscribing to the channel and sharing with like-minded individuals.\n\nThe video is quite long (1959 seconds) and appears to be a mix of various topics and promotions.", 'done': T

## Multimodal Models

In [None]:
from project.llm_models import prompts
from project.llm_models.inference import OllamaModel, generate_multimodal
from project.utils import dataset_utils

results = {}

for i in range(2):
	video = videos[i]
	prompt = prompts.input_full_text_and_images_output_binary_version_01()
	images = dataset_utils.generate_images_path_from_video_id(video.id, "../data/myyounicon-01/")
	print(images)
	results[video.id] = generate_multimodal(
		OllamaModel.LLAVA, system_prompt=prompt, user_prompt=str(video), images_paths=images
	)
	print(results[video.id])

['/home/leoli/Uni/Polimi/Thesis/master-thesis/data/myyounicon-01/images/-8u-hsE87xw_fi_02_1306.1s.jpg', '/home/leoli/Uni/Polimi/Thesis/master-thesis/data/myyounicon-01/images/-8u-hsE87xw_rand_00_1205.5s.jpg', '/home/leoli/Uni/Polimi/Thesis/master-thesis/data/myyounicon-01/images/-8u-hsE87xw_rand_01_486.4s.jpg', '/home/leoli/Uni/Polimi/Thesis/master-thesis/data/myyounicon-01/images/-8u-hsE87xw_fi_01_653.1s.jpg', '/home/leoli/Uni/Polimi/Thesis/master-thesis/data/myyounicon-01/images/-8u-hsE87xw_rand_02_1695.1s.jpg', '/home/leoli/Uni/Polimi/Thesis/master-thesis/data/myyounicon-01/images/-8u-hsE87xw_fi_00_0.1s.jpg']
None
['/home/leoli/Uni/Polimi/Thesis/master-thesis/data/myyounicon-01/images/-K9TdiQPmX4_fi_01_240.8s.jpg', '/home/leoli/Uni/Polimi/Thesis/master-thesis/data/myyounicon-01/images/-K9TdiQPmX4_rand_02_137.1s.jpg', '/home/leoli/Uni/Polimi/Thesis/master-thesis/data/myyounicon-01/images/-K9TdiQPmX4_rand_01_413.0s.jpg', '/home/leoli/Uni/Polimi/Thesis/master-thesis/data/myyounicon-01/

In [6]:
results

{'-8u-hsE87xw': None, '-K9TdiQPmX4': None}