In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import re # recognize phone number

In [3]:
from langdetect import detect, detect_langs
from deep_translator import GoogleTranslator

### Clean data for archived side events

In [4]:
df_events_all = pd.read_csv("COP_archive_side_events_compiled_raw_2024.csv")

In [5]:
# split up detailed titles, desciprions, detailed organizer information
df_events_all[['Title/Description','Speakers']] = df_events_all["Title/theme/speakers"].str.split('  ', expand=True)
df_events_all = df_events_all[df_events_all['Title/Description']!="test test"] ## REMOVE test events
df_events_all = df_events_all.set_index("SE_Code").reset_index()
df_events_all["Organizer_Affiliation"] = np.nan
df_events_all["Organizer_Affiliation"] = df_events_all["Organizer_Affiliation"].astype(object)
df_events_all["Organizer_Name"] = np.nan
df_events_all["Organizer_Name"] = df_events_all["Organizer_Name"].astype(object)

# set up edges dataframe
edges_side_events_archive = pd.DataFrame(data={"Title/Description":np.nan,"SE_Code":np.nan},index=[0])

edges_count = 0
for i in df_events_all.index:
    orgnzr = df_events_all["Organizer"][i]
    org_list = []
    organizer_list = []
    for unit in orgnzr.split("  "):
        if "@" in unit:
            org_name = unit.rsplit("@",1)[0]
            org_name = org_name.rsplit(" ",1)[0]
            org_list.append(org_name)
        elif "+" not in unit:
            organizer_list.append(unit)
    df_events_all.at[i,"Organizer_Affiliation"] = list(set(org_list)) # delete duplicates
    df_events_all.at[i,"Organizer_Name"] = organizer_list
    df_events_all.loc[i,"Organizer_Count"] = len(org_list)
    
    org_list_unique = list(set(org_list))
    for n in np.arange(len(org_list_unique))[:-1]:
        source = org_list_unique[n]
        for m in np.arange(len(org_list_unique))[n+1:]:
            edges_side_events_archive.loc[edges_count,"Source_Name"] = source
            edges_side_events_archive.loc[edges_count,"Target_Name"] = org_list_unique[m]
            edges_side_events_archive.loc[edges_count,"Title/Description"] = df_events_all.loc[i,"Title/Description"]
            edges_side_events_archive.loc[edges_count,"SE_Code"] = df_events_all.loc[i,"SE_Code"]
            edges_side_events_archive.loc[edges_count,"Weight"] = 1/(np.arange(len(org_list_unique)).sum())
            edges_count = edges_count + 1
            
df_events_all["Year"] = df_events_all["Date"].str[-4:].astype(int)
df_events_all.set_index("SE_Code").to_csv("all_side_events.csv")
edges_side_events_archive.set_index("SE_Code").to_csv("Edges_All.csv")
df_events_all

  edges_side_events_archive.loc[edges_count,"Title/Description"] = df_events_all.loc[i,"Title/Description"]
  edges_side_events_archive.loc[edges_count,"SE_Code"] = df_events_all.loc[i,"SE_Code"]


Unnamed: 0,SE_Code,Code,Title/theme/speakers,Organizer,Date,Title/Description,Speakers,Organizer_Affiliation,Organizer_Name,Organizer_Count,Year
0,COP28-000,0,COP 28: Facing the challenges for developing c...,Yvonne Miller Berlie Third World Network (TWN...,"Fri, 01 Dec 2023",COP 28: Facing the challenges for developing c...,Speakers: to be decided,"[Bolivia (Plurinational State of), Third World...","[Yvonne Miller Berlie, Diego Pacheco]",2.0,2023
1,COP28-001,1,The Global Stocktake: How to strengthen climat...,Amir Khouzam International Committee of the R...,"Fri, 01 Dec 2023",The Global Stocktake: How to strengthen climat...,Speakers: World Bank Group UNHCR ICRC Somalia ...,"[Somalia, International Committee of the Red C...","[Amir Khouzam, Emily Wilkinson, Hafsa Abdilahi]",3.0,2023
2,COP28-002,2,Change through participation: Open Dialogues a...,Zuzanna Borowska Polish Ecological Club (PEC)...,"Fri, 01 Dec 2023",Change through participation: Open Dialogues a...,Speakers: A diverse group of speakers from the...,"[REScoop.eu vzw, India Water Foundation, Polis...","[Zuzanna Borowska, Olumide Idowu, FELI ESAU, S...",7.0,2023
3,COP28-003,3,Partnerships for Sustainable Finance: Opportun...,Samuel Confidence Dotse HATOF Foundation samu...,"Fri, 01 Dec 2023",Partnerships for Sustainable Finance: Opportun...,Speakers: 1. Minister of Lands and Natural Res...,[Faith Association of the Rehabilitation of St...,"[Samuel Confidence Dotse, Samuel Baechie]",2.0,2023
4,COP28-004,4,Proven climate solutions: following IP&LC lead...,Ashley Emerson Health In Harmony (HIH) ashley...,"Fri, 01 Dec 2023",Proven climate solutions: following IP&LC lead...,"Speakers: Myrna Cunningham, Pawanka Fund Nonet...","[Meridian Institute, Rights and Resources Inst...","[Ashley Emerson, Liz Duxbury, Lucy Mulenkei, A...",4.0,2023
...,...,...,...,...,...,...,...,...,...,...,...
4722,COP09-109,109,Russian Business Forum: Corporations and Regio...,Evgeniy Sokolov National Carbon Sequestration...,"Thu, 11 Dec 2003",Russian Business Forum: Corporations and Regio...,,[National Carbon Sequestration Foundation (NCSF)],[Evgeniy Sokolov],1.0,2003
4723,COP09-110,110,Standards to ensure high quality LULUCF projec...,Olivia Tanujaya Yayasan Pelangi (PELANGI) oli...,"Thu, 11 Dec 2003",Standards to ensure high quality LULUCF projec...,,[Yayasan Pelangi (PELANGI)],[Olivia Tanujaya],1.0,2003
4724,COP09-111,111,Linking Article 2 and Article 6: Experiments i...,Jean-Pascal van Ypersele Belgium vanypersele@...,"Fri, 12 Dec 2003",Linking Article 2 and Article 6: Experiments i...,,[Belgium],[Jean-Pascal van Ypersele],1.0,2003
4725,COP09-112,112,South-North Dialogue - Equity in the Greenhous...,"Bernd Brouns Wuppertal Institute for Climate,...","Fri, 12 Dec 2003",South-North Dialogue - Equity in the Greenhous...,,"[Wuppertal Institute for Climate, Environment ...",[Bernd Brouns],1.0,2003


### clean the dataset
- remove incomplete descriptions
- remove the word Cancelled 

In [10]:
# filter those with "TBC" in title/description, excluding "(TBC)" which only indicates unknown speakers in the description
TBC_df = df_events_all[df_events_all["Title/Description"].str.contains("TBC")].copy()
TBC_df = TBC_df[TBC_df["Title/Description"].str.contains("\(TBC\)")==False]
df_events_complete = df_events_all[df_events_all.index.isin(TBC_df.index)==False]
#(* tbc)
tbc_df2 = df_events_all[df_events_all["Title/Description"].str.contains("tbc")].copy()
tbc_df2 = tbc_df2[tbc_df2["Title/Description"].str.contains("\(* tbc\)")==False]
df_events_complete = df_events_complete[df_events_complete.index.isin(tbc_df2.index)==False]

# remove those with "TBA", "tbc"
strings_to_remove = ["TBA", "Tbc", "WILPF Italy WILPF Italy", "President Evo Morales meets IPOs",
                    "Coming soon ..", "COP Presidency Gender Event Presidency Gender Event"]
for special_string in strings_to_remove:
    df_events_complete = df_events_complete[df_events_complete["Title/Description"].str.contains(special_string)==False] 
    
# remove descriptions shorter than 10 words
df_events_complete["word_count"] = df_events_complete['Title/Description'].str.split().str.len()
df_events_complete = df_events_complete[df_events_complete["word_count"]>10]

df_events_all[df_events_all.index.isin(df_events_complete.index)==False].set_index("SE_Code").to_excel("note_side_events_incomplete.xlsx")
len(df_events_complete)

4688

In [11]:
# cancelled events
df_events_complete["Title/Description"] = df_events_complete["Title/Description"].str.replace("Cancelled","")

In [12]:
df_events_complete.to_csv("all_side_events_complete.csv",index=False)

### identify languages

In [13]:
df_events_complete_lang = df_events_complete.copy()
for i in df_events_complete_lang.index:
    text = df_events_complete_lang.loc[i,"Title/Description"]
    lang = detect(text)
    df_events_complete_lang.loc[i,"lang_most"] = lang
    if lang == "en":
        df_events_complete_lang.loc[i,"Title/Description_new"] = text
    else:
        df_events_complete_lang.loc[i,"Title/Description_new"] = GoogleTranslator(source=lang, target='en').translate(text)
    if i%1000==0:
        print("Completed detection on for "+str(i)+ " entries")
        print(detect(text))
df_events_complete_lang[df_events_complete_lang["lang_most"]!="en"].set_index("SE_Code").to_excel("note_side_events_language.xlsx")
df_events_complete_lang.to_csv("all_side_events_complete_lang.csv",index=False)
df_events_complete_lang

Completed detection on for 0 entries
en
Completed detection on for 1000 entries
en
Completed detection on for 2000 entries
en
Completed detection on for 3000 entries
en
Completed detection on for 4000 entries
en


Unnamed: 0,SE_Code,Code,Title/theme/speakers,Organizer,Date,Title/Description,Speakers,Organizer_Affiliation,Organizer_Name,Organizer_Count,Year,word_count,lang_most,Title/Description_new
0,COP28-000,0,COP 28: Facing the challenges for developing c...,Yvonne Miller Berlie Third World Network (TWN...,"Fri, 01 Dec 2023",COP 28: Facing the challenges for developing c...,Speakers: to be decided,"[Bolivia (Plurinational State of), Third World...","[Yvonne Miller Berlie, Diego Pacheco]",2.0,2023,20,en,COP 28: Facing the challenges for developing c...
1,COP28-001,1,The Global Stocktake: How to strengthen climat...,Amir Khouzam International Committee of the R...,"Fri, 01 Dec 2023",The Global Stocktake: How to strengthen climat...,Speakers: World Bank Group UNHCR ICRC Somalia ...,"[Somalia, International Committee of the Red C...","[Amir Khouzam, Emily Wilkinson, Hafsa Abdilahi]",3.0,2023,59,en,The Global Stocktake: How to strengthen climat...
2,COP28-002,2,Change through participation: Open Dialogues a...,Zuzanna Borowska Polish Ecological Club (PEC)...,"Fri, 01 Dec 2023",Change through participation: Open Dialogues a...,Speakers: A diverse group of speakers from the...,"[REScoop.eu vzw, India Water Foundation, Polis...","[Zuzanna Borowska, Olumide Idowu, FELI ESAU, S...",7.0,2023,57,en,Change through participation: Open Dialogues a...
3,COP28-003,3,Partnerships for Sustainable Finance: Opportun...,Samuel Confidence Dotse HATOF Foundation samu...,"Fri, 01 Dec 2023",Partnerships for Sustainable Finance: Opportun...,Speakers: 1. Minister of Lands and Natural Res...,[Faith Association of the Rehabilitation of St...,"[Samuel Confidence Dotse, Samuel Baechie]",2.0,2023,42,en,Partnerships for Sustainable Finance: Opportun...
4,COP28-004,4,Proven climate solutions: following IP&LC lead...,Ashley Emerson Health In Harmony (HIH) ashley...,"Fri, 01 Dec 2023",Proven climate solutions: following IP&LC lead...,"Speakers: Myrna Cunningham, Pawanka Fund Nonet...","[Meridian Institute, Rights and Resources Inst...","[Ashley Emerson, Liz Duxbury, Lucy Mulenkei, A...",4.0,2023,50,en,Proven climate solutions: following IP&LC lead...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4722,COP09-109,109,Russian Business Forum: Corporations and Regio...,Evgeniy Sokolov National Carbon Sequestration...,"Thu, 11 Dec 2003",Russian Business Forum: Corporations and Regio...,,[National Carbon Sequestration Foundation (NCSF)],[Evgeniy Sokolov],1.0,2003,41,en,Russian Business Forum: Corporations and Regio...
4723,COP09-110,110,Standards to ensure high quality LULUCF projec...,Olivia Tanujaya Yayasan Pelangi (PELANGI) oli...,"Thu, 11 Dec 2003",Standards to ensure high quality LULUCF projec...,,[Yayasan Pelangi (PELANGI)],[Olivia Tanujaya],1.0,2003,51,en,Standards to ensure high quality LULUCF projec...
4724,COP09-111,111,Linking Article 2 and Article 6: Experiments i...,Jean-Pascal van Ypersele Belgium vanypersele@...,"Fri, 12 Dec 2003",Linking Article 2 and Article 6: Experiments i...,,[Belgium],[Jean-Pascal van Ypersele],1.0,2003,49,en,Linking Article 2 and Article 6: Experiments i...
4725,COP09-112,112,South-North Dialogue - Equity in the Greenhous...,"Bernd Brouns Wuppertal Institute for Climate,...","Fri, 12 Dec 2003",South-North Dialogue - Equity in the Greenhous...,,"[Wuppertal Institute for Climate, Environment ...",[Bernd Brouns],1.0,2003,38,en,South-North Dialogue - Equity in the Greenhous...


In [None]:
# split up detailed titles, desciprions, detailed organizer information
df_events_all[['Title/Description','Speakers']] = df_events_all["Title/theme/speakers"].str.split('  ', expand=True)
df_events_all = df_events_all.set_index("SE_Code").reset_index()
df_events_all["Organizer_Affiliation"] = np.nan
df_events_all["Organizer_Affiliation"] = df_events_all["Organizer_Affiliation"].astype(object)
df_events_all["Organizer_Name"] = np.nan
df_events_all["Organizer_Name"] = df_events_all["Organizer_Name"].astype(object)

# set up edges dataframe
edges_side_events_archive = pd.DataFrame(data={"Title/Description":np.nan,"SE_Code":np.nan},index=[0])

edges_count = 0
for i in df_events_all.index:
    orgnzr = df_events_all["Organizer"][i]
    org_list = []
    organizer_list = []
    for unit in orgnzr.split("  "):
        if "@" in unit:
            org_name = unit.rsplit("@",1)[0]
            org_name = org_name.rsplit(" ",1)[0]
            org_list.append(org_name)
        elif "+" not in unit:
            organizer_list.append(unit)
    df_events_all.at[i,"Organizer_Affiliation"] = list(set(org_list)) # delete duplicates
    df_events_all.at[i,"Organizer_Name"] = organizer_list
    df_events_all.loc[i,"Organizer_Count"] = len(org_list)
    
    org_list_unique = list(set(org_list))
    for n in np.arange(len(org_list_unique))[:-1]:
        source = org_list_unique[n]
        for m in np.arange(len(org_list_unique))[n+1:]:
            edges_side_events_archive.loc[edges_count,"Source_Name"] = source
            edges_side_events_archive.loc[edges_count,"Target_Name"] = org_list_unique[m]
            edges_side_events_archive.loc[edges_count,"Title/Description"] = df_events_all.loc[i,"Title/Description"]
            edges_side_events_archive.loc[edges_count,"SE_Code"] = df_events_all.loc[i,"SE_Code"]
            edges_side_events_archive.loc[edges_count,"Weight"] = 1/(np.arange(len(org_list_unique)).sum())
            edges_count = edges_count + 1
    
df_events_all.set_index("SE_Code").to_csv("COParchive_side_events_compiled.csv")
edges_side_events_archive.set_index("SE_Code").to_csv("Edges_Archive.csv")