In [1]:
# let√∂lti a core list√°kat
import requests
import csv
from io import StringIO

core_versions = ["CORE2008", "ERA2010", "CORE2013", "CORE2014", "CORE2017", "CORE2018", "CORE2020", "CORE2021", "CORE2023"]
    
for rank in ["A*","A","B","C"]: 
    rank_name=rank.replace("*","star")

    url = "https://portal.core.edu.au/conf-ranks/"

    for core in core_versions:
        print("‚ñ∂Ô∏è Downloading version: {}".format(core))
        page = 1
        all_rows = []
        header_saved = False

        while True:
            params = {
                "search": rank,
                "by": "rank",
                "source": core,
                "sort": "asource",
                "page": page,
                "do": "Export"
            }

            response = requests.get(url, params=params)
            if response.status_code == 200:
                csv_data = response.content.decode("utf-8")
                csv_reader = csv.reader(StringIO(csv_data))
                rows = list(csv_reader)

                if not rows or len(rows) <= 1:
                    break  # No data

                if not header_saved:
                    all_rows.append(rows[0])
                    header_saved = True

                all_rows.extend(rows[1:])

                if len(rows) < 51:
                    break  # Last page
                page += 1
            else:
                print("Could not fetch {} page {}: response was {}".format(core, page, response.status_code))
                break
        if len(all_rows) >= 1:
            filename = "core_{}_{}_export.csv".format(rank_name,core)
            with open(filename, "w", newline='', encoding="utf-8") as f:
                writer = csv.writer(f)
                writer.writerows(all_rows)
            print("‚úÖ Saved: {}".format(filename))
        else:
            print("No data found for {} in rank {}".format(core, rank))

‚ñ∂Ô∏è Downloading version: CORE2008
Could not fetch CORE2008 page 3: response was 500
‚úÖ Saved: core_Astar_CORE2008_export.csv
‚ñ∂Ô∏è Downloading version: ERA2010
No data found for ERA2010 in rank A*
‚ñ∂Ô∏è Downloading version: CORE2013
Could not fetch CORE2013 page 3: response was 500
‚úÖ Saved: core_Astar_CORE2013_export.csv
‚ñ∂Ô∏è Downloading version: CORE2014
Could not fetch CORE2014 page 3: response was 500
‚úÖ Saved: core_Astar_CORE2014_export.csv
‚ñ∂Ô∏è Downloading version: CORE2017
Could not fetch CORE2017 page 3: response was 500
‚úÖ Saved: core_Astar_CORE2017_export.csv
‚ñ∂Ô∏è Downloading version: CORE2018
Could not fetch CORE2018 page 3: response was 500
‚úÖ Saved: core_Astar_CORE2018_export.csv
‚ñ∂Ô∏è Downloading version: CORE2020
Could not fetch CORE2020 page 3: response was 500
‚úÖ Saved: core_Astar_CORE2020_export.csv
‚ñ∂Ô∏è Downloading version: CORE2021
Could not fetch CORE2021 page 3: response was 500
‚úÖ Saved: core_Astar_CORE2021_export.csv
‚ñ∂Ô∏è Downloading versi

In [1]:
# tov√°bbi oldalak let√∂lt√©se DBLP linekekkel
import requests
from bs4 import BeautifulSoup
import csv

BASE_URL = "https://portal.core.edu.au/conf-ranks/"
HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

for rank in ["A*","A","B","C"]: 
    rank_name=rank.replace("*","star")
    all_rows = []
    page = 1
    while True:
        print(f"{rank_name} Feldolgoz√°s: {page}. oldal...")
        params = {
            "search": rank,
            "by": "rank",
            "source": "all",
            "sort": "asource",
            "page": page
        }

        response = requests.get(BASE_URL, params=params, headers=HEADERS)
        soup = BeautifulSoup(response.content, "html.parser")

        table = soup.find("table")
        if not table:
            print("üö´ Nem tal√°lhat√≥ t√°bl√°zat ezen az oldalon.")
            break

        rows = table.find_all("tr")[1:]  # Skip header
        if not rows or len(rows) < 50:
            print("‚úÖ Utols√≥ oldal el√©rve.")
            break

        for row in rows:
            cols = row.find_all("td")
            if len(cols) != 9:
                continue

            title = cols[0].text.strip()
            acronym = cols[1].text.strip()
            source = cols[2].text.strip()
            rank = cols[3].text.strip()
            note = cols[4].text.strip()
            dblp_tag = cols[5].find("a")
            dblp = dblp_tag["href"].strip() if dblp_tag else ""
            primary_for = cols[6].text.strip()
            comments = cols[7].text.strip()
            avg_rating = cols[8].text.strip()

            all_rows.append([
                title, acronym, source, rank, note,
                dblp, primary_for, comments, avg_rating
            ])

        page += 1

    # CSV ment√©s
    filename = "core_{}_with_dblp.csv".format(rank_name)
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow([
            "Title", "Acronym", "Source", "Rank", "Note",
            "DBLP", "Primary FoR", "Comments", "Average Rating"
        ])
        writer.writerows(all_rows)


    print("‚úÖ Ment√©s k√©sz: {}".format(filename))

Astar Feldolgoz√°s: 1. oldal...
Astar Feldolgoz√°s: 2. oldal...
‚úÖ Utols√≥ oldal el√©rve.
‚úÖ Ment√©s k√©sz: core_Astar_with_dblp.csv
A Feldolgoz√°s: 1. oldal...


KeyboardInterrupt: 

In [None]:
# a k√©t t√°bla √∂sszegy√∫r√°sa:
import pandas as pd
import glob
import os
import json
import re


def has_conflicting_ranks(year_rank_list):
    """
    Megn√©zi, hogy egy konferencia ugyanabban az √©vben t√∂bb rangot is kapott-e.
    P√©lda input: ["CORE2013_A*", "CORE2014_A", "CORE2014_B"]
    """
    year_to_ranks = {}

    for item in year_rank_list.split(', '):
        match = re.search(r"(\d{4})_?([A-C]\*?)$", item)
        if match:
            year = int(match.group(1))
            rank = match.group(2)
            year_to_ranks.setdefault(year, set()).add(rank)

    # ha b√°rmely √©vhez egyn√©l t√∂bb rank tartozik ‚Üí gond van
    for year, ranks in year_to_ranks.items():
        if len(ranks) > 1:
            return True
    return False

all_acronym_to_info={}
name_to_acronym={}
reproduced_acronym = 0
ignored=[]
for rank in ["A*","A","B","C"]: 
    rank_name=rank.replace("*","star")
    directory = "./"
    csv_files = glob.glob(os.path.join(directory, "core_{}_*_export.csv".format(rank_name)))
    dblp_file = "core_{}_with_dblp.csv".format(rank_name)

    # üîÑ √öj strukt√∫ra: Acronym alapj√°n
    acronym_to_info = {}

    # üîÑ CSV f√°jlok feldolgoz√°sa
    for file in csv_files:
        year = os.path.basename(file).split("_")[2]
        df = pd.read_csv(file)

        df.columns.values[0] = "ID"
        df.columns.values[1] = "Name"
        df.columns.values[2] = "Acronym"

        for _, row in df.iterrows():
            acronym = row["Acronym"]
            if pd.isna(acronym):
                if row["Name"] in name_to_acronym:
                    acronym = name_to_acronym[row["Name"]]
                    reproduced_acronym += 1
                else:
                    ignored.append(dict(row))
                    continue  
            name_to_acronym[row["Name"]]=acronym
            if acronym not in acronym_to_info:
                acronym_to_info[acronym] = {
                    "Acronym": acronym,
                    "Name": row["Name"],
                    "YearsListed": [year],
                    "DBLP ID": row["ID"]
                }                
            else:
                if year not in acronym_to_info[acronym]["YearsListed"]: 
                    acronym_to_info[acronym]["YearsListed"].append(year)
            if acronym not in all_acronym_to_info:
                all_acronym_to_info[acronym] = {
                    "Acronym": acronym,
                    "Name": row["Name"],
                    "YearsListed": [year+"_"+rank],
                    "DBLP ID": row["ID"]
                }                
            else:
                if year+"_"+rank not in all_acronym_to_info[acronym]["YearsListed"]: 
                    all_acronym_to_info[acronym]["YearsListed"].append(year+"_"+rank)

    # üì• DBLP f√°jl beolvas√°sa
    dblp_df = pd.read_csv(dblp_file)
    if "Acronym" not in dblp_df.columns or "DBLP" not in dblp_df.columns:
        raise KeyError("Acronym vagy DBLP oszlop hi√°nyzik a DBLP f√°jlb√≥l.")

    # üîó DBLP linkek hozz√°ad√°sa a dictionary-hez
    for _, row in dblp_df.iterrows():
        acronym = row["Acronym"]
        if acronym in acronym_to_info:
            acronym_to_info[acronym]["DBLP URL"] = row["DBLP"]
            acronym_to_info[acronym]["Average Rating"] = str(row.get("Average Rating", None))
        if acronym in all_acronym_to_info:
            all_acronym_to_info[acronym]["DBLP URL"] = row["DBLP"]
            all_acronym_to_info[acronym]["Average Rating"] = str(row.get("Average Rating", None))

    # üßæ T√°bl√°zat √©p√≠t√©se
    final_df = pd.DataFrame(acronym_to_info.values())
    final_df["YearsListed"] = final_df["YearsListed"].apply(lambda y: ", ".join(sorted(y)))

    # üíæ Ment√©s
    final_df.to_csv("core_{}_merged_5cols_by_acronym.csv".format(rank_name), index=False)
    print("‚úÖ Elmentve: core_{}_merged_5cols_by_acronym.csv".format(rank_name))

print(f"üîÑ √ñsszesen {reproduced_acronym} acronim azonos√≠t√≥ lett reproduk√°lva a nevek alapj√°n.")
print(f"üö´ √ñsszesen {len(ignored)} sor lett kihagyva hi√°nyz√≥ acronim azonos√≠t√≥ miatt. L√°sd: ignored_conferences.json")
with open("ignored_conferences.json", "w", encoding="utf-8") as f:
    json.dump(ignored, f, indent=2, ensure_ascii=False)
# üßæ T√°bl√°zat √©p√≠t√©se
all_final_df = pd.DataFrame(all_acronym_to_info.values())

def extract_year(s: str) -> int:
    """Kinyeri az √©vsz√°mot a stringb≈ël."""
    match = re.search(r"\d{4}", s)
    return int(match.group()) if match else 0

all_final_df["YearsListed"] = all_final_df["YearsListed"].apply(
    lambda y: ", ".join(sorted(y, key=extract_year))
)

for index, row in all_final_df.iterrows():
    if row['Acronym']=='DSN':
        pass
    if has_conflicting_ranks(row["YearsListed"]):
        print(f"‚ö†Ô∏è Figyelmeztet√©s: {row['Name']} ({row['Acronym']}) konferencia ugyanabban az √©vben t√∂bb rangot is kapott! ({row['YearsListed']})")

all_final_df.to_csv("core_table.csv", index=False)
print("‚úÖ Elmentve: core_table.csv")


‚úÖ Elmentve: core_Astar_merged_5cols_by_acronym.csv
‚úÖ Elmentve: core_A_merged_5cols_by_acronym.csv
‚úÖ Elmentve: core_B_merged_5cols_by_acronym.csv
‚úÖ Elmentve: core_C_merged_5cols_by_acronym.csv
üîÑ √ñsszesen 0 acronim azonos√≠t√≥ lett reproduk√°lva a nevek alapj√°n.
üö´ √ñsszesen 2128 sor lett kihagyva hi√°nyz√≥ acronim azonos√≠t√≥ miatt. L√°sd: ignored_conferences.json
‚ö†Ô∏è Figyelmeztet√©s: IEEE/IFIP International Conference on Dependable Systems and Networks (DSN) konferencia ugyanabban az √©vben t√∂bb rangot is kapott! (CORE2008_A*, ERA2010_A, ERA2010_C, CORE2013_A*, CORE2013_C, CORE2014_A, CORE2014_C, CORE2017_A, CORE2017_C, CORE2018_A, CORE2018_C, CORE2020_A, CORE2021_A, CORE2023_A)
‚ö†Ô∏è Figyelmeztet√©s: International Conference on Information Systems (ICIS) konferencia ugyanabban az √©vben t√∂bb rangot is kapott! (CORE2008_A*, ERA2010_A, ERA2010_C, CORE2013_A*, CORE2013_C, CORE2014_A*, CORE2014_C, CORE2017_A*, CORE2017_C, CORE2018_A*, CORE2018_C, CORE2020_C, CORE2021_

In [10]:
# improve the conference table with thery/applied clasificaitons, and also with typos in the acronyms

def fix_acronym(name):
    acronym_dic={
        'EuroPar': 'Euro-Par'
    }
    if name in acronym_dic:
        return acronym_dic[name]
    return name


ranks = ["Astar", "A"]

theory={}
applied={}
for rank in ranks:
    theory_filename = f"core_{rank}_theory_conferences.json"
    applied_filename = f"core_{rank}_applied_conferences.json"

    if os.path.exists(theory_filename):
        with open(theory_filename, "r", encoding="utf-8") as f:
            theory[rank] = json.load(f)

    if os.path.exists(applied_filename):
        with open(applied_filename, "r", encoding="utf-8") as f:
            applied[rank] = json.load(f)

# Hozz√°adjuk az mta_class oszlopot az all_final_df-hez
# El≈ësz√∂r √∂ssze√°ll√≠tjuk az √∂sszes theory √©s applied konferencia nev√©t
all_theory_names = set()
all_applied_names = set()

for rank in ranks:
    if rank in theory:
        all_theory_names.update(theory[rank])
    if rank in applied:
        all_applied_names.update(applied[rank])

print(f"üìö √ñsszes elm√©leti konferencia: {len(all_theory_names)}")
print(f"üîß √ñsszes alkalmazott konferencia: {len(all_applied_names)}")

# L√©trehozzuk az mta_class oszlopot
def assign_mta_class(name):
    if name in all_theory_names:
        return 3
    elif name in all_applied_names:
        return 6
    else:
        return None

all_final_df['mta_class'] = all_final_df['Name'].apply(assign_mta_class)

# Konvert√°ljuk integer t√≠pus√∫ra (Int64 t√°mogatja a None/NaN √©rt√©keket)
all_final_df['mta_class'] = all_final_df['mta_class'].astype('Int64')

all_final_df['Acronym'] = all_final_df['Acronym'].apply(fix_acronym)

# Statisztika
theory_count = (all_final_df['mta_class'] == 3).sum()
applied_count = (all_final_df['mta_class'] == 6).sum()
unclassified_count = all_final_df['mta_class'].isna().sum()

print(f"\n‚úÖ MTA oszt√°lyoz√°s hozz√°adva:")
print(f"   - Elm√©leti (3): {theory_count}")
print(f"   - Alkalmazott (6): {applied_count}")
print(f"   - Oszt√°lyozatlan: {unclassified_count}")

# Friss√≠tj√ºk a CSV f√°jlt
all_final_df.to_csv("core_table.csv", index=False)
print(f"‚úÖ Friss√≠tett CSV mentve: core_table.csv")

üìö √ñsszes elm√©leti konferencia: 71
üîß √ñsszes alkalmazott konferencia: 317

‚úÖ MTA oszt√°lyoz√°s hozz√°adva:
   - Elm√©leti (3): 70
   - Alkalmazott (6): 312
   - Oszt√°lyozatlan: 1524
‚úÖ Friss√≠tett CSV mentve: core_table.csv


In [None]:
# innent≈ël m√°r nem kell (a r√©gi JSON f√°jlok)
for rank in ["A*","A","B","C"]: 
    rank_name=rank.replace("*","star")
    all_years_int = set()
    for conf in acronym_to_info.values():
        all_years_int.update(int(y.replace("CORE", "").replace("ERA", "")) for y in conf["YearsListed"])
    year_min = min(all_years_int)
    year_max = max(all_years_int)

    for conf in acronym_to_info.values():
        years = sorted(int(y.replace("CORE", "").replace("ERA", "")) for y in conf["YearsListed"])
        start = min(years)
        end = max(years)

        if start == year_min and end == year_max:
            conf["YearsInterval"] = ""
        elif start == year_min:
            conf["YearsInterval"] = f"‚Äì{end}"
        elif end == year_max:
            conf["YearsInterval"] = f"{start}‚Äì"
        else:
            conf["YearsInterval"] = f"{start}‚Äì{end}"
        

    with open("core_{}_conferences.json".format(rank_name), "w", encoding="utf-8") as f:
        json.dump(acronym_to_info, f, indent=2, ensure_ascii=False)
    print("‚úÖ Elmentve: core_{}_conferences.json with {} records".format(rank_name, len(acronym_to_info)))

‚úÖ Elmentve: core_Astar_conferences.json with 1179 records
‚úÖ Elmentve: core_A_conferences.json with 1179 records
‚úÖ Elmentve: core_B_conferences.json with 1179 records
‚úÖ Elmentve: core_C_conferences.json with 1179 records


In [None]:
import json
import os

normalize=0
normalize_all=0
for rank in ["Astar","A","B","C"]:
    try:
        with open(f'core_{rank}_conferences_classified.json', 'r', encoding='utf-8') as f:
            conferences = json.load(f)

        in_last_core = 0
        for acronym, info in conferences.items():
            if "CORE2023" in info.get("YearsListed", []):
                in_last_core += 1
        if normalize==0:
            normalize=in_last_core
        if normalize_all==0:
            normalize_all=len(conferences)
        print(f"üè∑Ô∏è {rank} konferenci√°k sz√°ma: {len(conferences)} (normalized: {len(conferences)/normalize_all}) az utols√≥ban {in_last_core} (normalized: {in_last_core/normalize})")

    except Exception as e:
        print(f"üö´ Hiba a {rank} f√°jl feldolgoz√°sakor: {e}")

        old_name = f"core_{rank}_conferences.json"
        new_name = f"core_{rank}_conferences_classified.json"

        # use notebook-level flags if present
        force = globals().get("force", False)
        dry_run = globals().get("dry_run", False)

        if not os.path.exists(old_name):
            print(f"[skip] {old_name} not found")
            continue

        if os.path.exists(new_name) and not force:
            print(f"[exists] {new_name} already exists (set force=True to overwrite)")
            continue

        print(f"Renaming: {old_name} -> {new_name}")
        if not dry_run:
            if os.path.exists(new_name) and force:
                os.remove(new_name)
            os.rename(old_name, new_name)

üè∑Ô∏è Astar konferenci√°k sz√°ma: 80 (normalized: 1.0) az utols√≥ban 59 (nromalized: 1.0)
üè∑Ô∏è A konferenci√°k sz√°ma: 327 (normalized: 4.0875) az utols√≥ban 116 (nromalized: 1.9661016949152543)
üè∑Ô∏è B konferenci√°k sz√°ma: 621 (normalized: 7.7625) az utols√≥ban 220 (nromalized: 3.7288135593220337)
üè∑Ô∏è C konferenci√°k sz√°ma: 1004 (normalized: 12.55) az utols√≥ban 357 (nromalized: 6.0508474576271185)


In [1]:
import json
import os
from collections import defaultdict

# F√°jlnevek √©s rankek
ranks = ["Astar", "A", "B", "C"]
data = {}
acronym_to_ranks = defaultdict(list)

# üîÑ F√°jlok beolvas√°sa
for rank in ranks:
    filename = f"core_{rank}_conferences_classified.json"
    if not os.path.exists(filename):
        print(f"‚ö†Ô∏è  {filename} nem tal√°lhat√≥, kihagyva.")
        continue

    with open(filename, "r", encoding="utf-8") as f:
        data[rank] = json.load(f)

    for acronym in data[rank].keys():
        acronym_to_ranks[acronym].append(rank)

# üéØ T√∂bb rangban szerepl≈ë konferenci√°k keres√©se
multi_rank = {a: rs for a, rs in acronym_to_ranks.items() if len(rs) > 1}

print(f"üîç {len(multi_rank)} konferencia t√∂bb rangban is szerepel:")

for acronym, ranks_list in sorted(multi_rank.items()):
    print(f"  ‚Ä¢ {acronym}: {', '.join(ranks_list)}")

# üíæ (opcion√°lis) Ment√©s f√°jlba
out_name = "core_conferences_with_multiple_ranks.json"
with open(out_name, "w", encoding="utf-8") as f:
    json.dump(multi_rank, f, indent=2, ensure_ascii=False)

print(f"\n‚úÖ Eredm√©ny elmentve: {out_name}")


üîç 353 konferencia t√∂bb rangban is szerepel:
  ‚Ä¢ AAIM: B, C
  ‚Ä¢ ACE: B, C
  ‚Ä¢ ACSAC: A, B
  ‚Ä¢ ADBIS: B, C
  ‚Ä¢ ADHOC-NOW: B, C
  ‚Ä¢ ADMA: B, C
  ‚Ä¢ AIME: A, B
  ‚Ä¢ AISTATS: A, B
  ‚Ä¢ ALIFE: A, C
  ‚Ä¢ ALT: A, B
  ‚Ä¢ AMCIS: A, B
  ‚Ä¢ AMOC: B, C
  ‚Ä¢ ANSS: B, C
  ‚Ä¢ AOIR: B, C
  ‚Ä¢ APCC: B, C
  ‚Ä¢ APCHI: B, C
  ‚Ä¢ APCOMin: A, C
  ‚Ä¢ APNOMS: B, C
  ‚Ä¢ APSEC: B, C
  ‚Ä¢ APWEB: B, C
  ‚Ä¢ ASE: Astar, A
  ‚Ä¢ ATVA: A, B
  ‚Ä¢ AVSS: A, B
  ‚Ä¢ Ada-Europe: A, B
  ‚Ä¢ AiML: A, B
  ‚Ä¢ AofA: B, C
  ‚Ä¢ AsiaCCS: A, B
  ‚Ä¢ Broadnets: B, C
  ‚Ä¢ CAADRIA: B, C
  ‚Ä¢ CAIP: A, B, C
  ‚Ä¢ CARDIS: B, C
  ‚Ä¢ CASES: A, B
  ‚Ä¢ CBSE: A, B
  ‚Ä¢ CC: A, B
  ‚Ä¢ CCA: B, C
  ‚Ä¢ CCC: A, B
  ‚Ä¢ CCGRID: A, B
  ‚Ä¢ CCSC: B, C
  ‚Ä¢ CEC: A, B, C
  ‚Ä¢ CGA: B, C
  ‚Ä¢ CGI: A, B, C
  ‚Ä¢ CHES: A, B, C
  ‚Ä¢ CIAA: B, C
  ‚Ä¢ CIAC: B, C
  ‚Ä¢ CICLING: B, C
  ‚Ä¢ CISIS: B, C
  ‚Ä¢ CLUSTER: A, B
  ‚Ä¢ COCOA: B, C
  ‚Ä¢ COINE: B, C
  ‚Ä¢ COLING: A, B
  ‚Ä¢ COPLAS: B, C
  ‚Ä¢ CPAIOR: A, B
  ‚Ä¢