In [1]:
# Import necessary libraries
import os
import ast
import csv
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pymongo import MongoClient
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.manifold import TSNE
from sklearn.neighbors import KNeighborsClassifier
from dotenv import load_dotenv
from tqdm import trange

# Import libraries for working with language models and Google Gemini
from langchain_openai import ChatOpenAI, OpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold

# Install the google-generativeai package (uncomment the line below to run the installation)
!pip install -U -q google-generativeai

# Set up the environment for plotting
%matplotlib inline

# Load environment variables
load_dotenv()


True

In [3]:
# Function to load and combine the split dataframes
def load_and_merge_csv(file_pattern, num_files):
    file_names = [file_pattern.format(i) for i in range(1, num_files + 1)]
    dataframes = [pd.read_csv(filename) for filename in file_names]
    merged_df = pd.concat(dataframes, ignore_index=True)
    return merged_df

df = load_and_merge_csv('data_upload/cluster_labels{}.csv', 4)

In [4]:
def visualise_titles(df, cluster):
    sample = pd.DataFrame(df[df['Cluster'] == cluster]).reset_index()
    print('Common Theme:' +sample.Common_Theme[0])
    print('Titles: ')
    print(sample.sample(8).Title.values)
    
visualise_titles(df,3)

Common Theme:Technology, Sustainability, and Social Impact
Titles: 
['Most online hate targets women, says European Union report'
 'Evening Update: Today’s headlines from The Straits Times on Nov 19, 2023'
 'Amazon and TikTok leave opening in next e-commerce boom '
 "Lessons that helped Singapore's Osim open some 400 stores in 100 cities"
 'Hassle-free cleaning: Smart robot vacuum cleans own mop pads and revisits dirty areas'
 'TikTok opposes mooted Indonesia social media transaction ban'
 'Binance sees $1.3 billion in outflows after Zhao steps down to settle US probe '
 "Apple files legal challenge to EU's Digital Markets Act"]


In [5]:
df.head()

Unnamed: 0,id,Text,Title,embeddings,Cluster,combined,Common_Theme
0,nos7tzp7jprxlqxe,GENEVA – The remains of a climber discovered i...,Remains found in Swiss Alps are those of Briti...,"[0.063923, 0.065677, -0.001089, 0.065425, -0.0...",17,Title: Remains found in Swiss Alps are those o...,Violence and Injustice
1,zvv4ue0w64vfqoz1,Ms Greta Thunburg became a household name when...,Involve youth in shaping ethical use of AI,"[0.063668, 0.098002, -0.022514, -0.033031, -0....",3,Title: Involve youth in shaping ethical use of...,"Technology, Sustainability, and Social Impact"
2,aph1tgua3xxoq2sg,NEW YORK - Defending women's champion Iga...,"Swiatek, Djokovic headline third round action ...","[-0.019315, 0.066645, 0.009547, 0.029555, -0.0...",10,"Title: Swiatek, Djokovic headline third round ...",Sports and Competition
3,rlh53czyst054zfn,JAKARTA – Hopes of a return to democracy in ju...,‘Systematic repression’ crushing Myanmar’s dem...,"[0.067328, -0.004407, 0.010127, -0.004268, -0....",4,Title: ‘Systematic repression’ crushing Myanma...,Political Crises and Human Rights Concerns
4,aksixz7uun2gkpss,JERUSALEM - Israel's shekel dropped to it...,Israel's shekel falls as judicial showdown looms,"[-0.043186, 0.076352, -0.015492, -0.02859, -0....",18,Title: Israel's shekel falls as judicial showd...,Politics and Elections
