In [None]:
from enum import Enum
import requests
import pandas as pd
import os
import shutil
from langdetect import detect, LangDetectException

In [None]:
class KialoSort(Enum):
  RANK_ACTIVITY = "rank_and_latest_activity"
  VIEW          = "view_count"
  LAST_ACTIVITY = "latest_activity"

class KialoFilter(Enum):
  PROMOTED      = "promoted"
  PARTICIPATE   = "participate"
  LAST_ACTIVITY = "latest_activity"
  TAG           = "tag"
  TAG_ALL       = "tag_all"

In [None]:
class KialoTool:
  def getTags(self):
    tags = requests.get("https://www.kialo.com/api/v1/discussiontags")
    return [item['tagName'] for item in tags.json()["tags"]]

  def getDiscussions(self, filter: KialoFilter, sort: KialoSort, limit=3000):
    req = "https://www.kialo.com/api/v1/discussions?filter=" + str(filter.value) + "&sort=" + str(sort.value) + "&limit=" + str(limit) + "&skip=0"
    tags = requests.get(req)
    return tags.json()["discussions"]

  def replaceSpecialChars(self, string):
    string = string.replace("?", "").replace(" ", "-")
    string = string.replace("/", "").replace("(", "").replace(")","")
    string = string.replace(":", "").replace(",", "").replace(".", "").replace(";", "")
    string = string.replace("'", "").replace('"', "") # remove quotes and apostrophes
    string = string.replace("%", "").replace("#", "")
    return string

  def discussions2urlID(self, discussions, export=False):
    # idsUrl = [x["title"].lower().replace("?", "").replace(" ", "-").replace("/", "")+"-"+str(x["id"]) for x in discussions]
    idsUrl = [self.replaceSpecialChars(x["title"].lower())+"-"+str(x["id"]) for x in discussions]
    tags = [x["tags"] for x in discussions]
    if export:
      pd.DataFrame.from_dict({"kialoUrlId" : idsUrl, "tags" : tags}).to_csv("../rawData/kialo/kialo-url-ids.csv")

    return idsUrl

k = KialoTool()

### Export most active and high ranked kialo discussions

In [None]:
k.discussions2urlID(k.getDiscussions(filter=KialoFilter.TAG, sort=KialoSort.RANK_ACTIVITY), export=True)

In [None]:
kialoUsername = "PLACEHOLDER"
secret      = "PLACEHOLDER"

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
from tqdm import tqdm

def downloadDiscussions(disscusionUrlIds):
  path = os.path.abspath("../rawData/kialo/debates")


  prefs = {"download.default_directory": path }
  options = Options()
  options.add_experimental_option("prefs", prefs)
  driver = webdriver.Chrome(options = options)


  driver.get("https://www.kialo.com/login")

  #Login
  id          = driver.find_element(By.ID, "emailOrUsername")
  password    = driver.find_element(By.ID, "password")
  loginButton = driver.find_element(By.CLASS_NAME, "login-form__submit")
  id.send_keys(kialoUsername)
  password.send_keys(secret)

  loginButton.click()

  myElem = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CLASS_NAME, 'home-page-section__header')))


  for urlId in tqdm(disscusionUrlIds):
    driver.get("https://www.kialo.com/export/" + urlId + ".txt")
    time.sleep(1.5)


In [None]:
downloadDiscussions(k.discussions2urlID(k.getDiscussions(filter=KialoFilter.TAG, sort=KialoSort.RANK_ACTIVITY), export=True))

In [None]:
def classify_files_by_language(base_folder_path):
    """Classify debate files by their language.

    Args:
        base_folder_path (os.path): The path to the folder containing the debate files.
    """
    for filename in os.listdir(base_folder_path):
        if filename.endswith(".txt"):
            file_path = os.path.join(base_folder_path, filename)
            
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
            
            try:
                lang = detect(content)
                print(f"File '{filename}' has been detected to be in language '{lang}'.")

                # Create folder for the language if not existing already
                lang_folder = os.path.join(base_folder_path, lang)
                if not os.path.exists(lang_folder):
                    os.makedirs(lang_folder)
                
                # Copy file to the appropriate folder
                new_file_path = os.path.join(lang_folder, filename)
                shutil.copy(file_path, new_file_path)
            
            except LangDetectException:
                print(f"Failed to detect language of '{filename}'.")

# Chemin du dossier contenant les fichiers de débats
base_folder_path = os.path.abspath("../rawData/kialo/debates")

# Exécuter la fonction
classify_files_by_language(base_folder_path)

In [None]:
# Print number of debates in each language
language_folders = [name for name in os.listdir(base_folder_path) if os.path.isdir(os.path.join(base_folder_path, name))]

for lang in language_folders:
    lang_folder = os.path.join(base_folder_path, lang)
    print(f"Language: {lang} -- Number of debates: {len(os.listdir(lang_folder))}")