In [138]:
import csv
import pandas as pd
import vobject
import re

In [142]:
FILE_NAME = 'pse-notion-contacts-dir-export-240130'

patterns = {
    'Contact Info': {
        'discord': r'(?:discord:)\s*(@?\w+)(?:#\d+)?',
        'github': r'(?:github:)\s*(?:https?://github.com/)?(@?\w+)',
        'email': r'(?:email:)\s*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4})',
        'telegram': r'(?:telegram:|tg:)\s*(@?\w+)',
    },
    'Availability': {'timezone': r'((UTC|GMT)\s*([+-]?\d{1,2})?)'}
    # Add more patterns as needed
}

In [143]:
def clean_projects_teams_col(input_str):
    if isinstance(input_str, str):
        # Use regular expression to remove content within brackets
        cleaned_str = re.sub(r'\(.*?\)', '', input_str)

        # Trim the resulting string
        cleaned_str = cleaned_str.strip()
        return f"PSE ({cleaned_str})"
    else:
        return "PSE"

def parse_csv():
    df = pd.read_csv(f"data/{FILE_NAME}.csv")

    for category, pattern_dict in patterns.items():
        for key, pattern in pattern_dict.items():
            # treat category column as string
            extracted = df[category].str.extract(pattern, flags=re.IGNORECASE)
            # keep only the first match if there are multiple matches
            df[key] = extracted.iloc[:, 0]
            # remove any whitespace in the values of the key column
            df[key] = df[key].str.strip()

    # replace in timezone string "GMT" with "UTC", convert to timezone data type
    df['timezone'] = df['timezone'].str.replace('GMT', 'UTC')

    # in a new column 'nickname', join with comma the values of 'Notion', "Discord", "Telegram", "Github" columns
    df['nickname'] = df[['Notion Handle', 'discord', 'telegram', 'github', "ENS Address"]].apply(lambda x: ','.join(x.dropna()), axis=1)

    # rename the column 'Name', to 'nf'
    df.rename(columns={'Name': 'nf'}, inplace=True)

    # from the column "Project & Teams", extract everything before the string ' (http', append it to the string "PSE: ", name the column "org"
    df['org'] = df['Projects & Teams'].apply(clean_projects_teams_col)
    
    return df

In [144]:
df = parse_csv()
df.loc[df['nf'] == 'Brechy'] #.iloc[0]
df


Unnamed: 0,nf,Notion Handle,Projects & Teams,Status,Contact Info,Availability,Ask me about,Interests,ENS Address,FTE,🎍 Projects,Created time,discord,github,email,telegram,timezone,nickname,org
0,Yaroslav (Yar) Rebenko,Yar,Folding - Schemes (https://www.notion.so/Foldi...,,,UTC+3,"mpc, external-memory algo, python, YTsaurus🙂","mpc, zkp, distributed computing",,,,"November 3, 2023 9:27 PM",,,,,UTC+3,Yar,PSE (Folding - Schemes)
1,Ryan,Hodlon,Summa (https://www.notion.so/Summa-bb99d064d49...,Explorer / Full time,,UTC-7 9am-6pm (flexible),"Product Management, Quality Assurance, Communi...",,Hodlon.art,1.0,,"June 22, 2023 10:52 PM",,,,,UTC-7,"Hodlon,Hodlon.art",PSE (Summa)
2,Jay,Jay,,"Explorer / Full time, Navigator / Team Lead - ...",discord : nightmare(jay)#1041\ngithub https://...,UTC +5:30 (10 am to 6pm) flexible,,"motorbikes, football, workout, hiking, games",,1.0,,"February 10, 2023 9:42 PM",,,,,UTC +5,Jay,PSE
3,blockdev,block dev,Audit (https://www.notion.so/Audit-3c704eb9820...,Explorer / Full time,Discord: blockdev#0246\nGithub: http://github....,UTC+5\n11 am to 7 pm (I’m flexible),"Solidity, Smart contract auditing",,,,Audits (https://www.notion.so/Audits-5d1f90e67...,"February 10, 2023 9:42 PM",blockdev,0xbok,blockdev3@gmail.com,bl0ckdev,UTC+5,"block dev,blockdev,bl0ckdev,0xbok",PSE (Audit)
4,Nico Serrano,Nicolas Serrano,Trusted Setup / P0tion / Definitely Setup (htt...,Explorer / Full time,,ECT (Ecuador Time UTC -5)\nFull time 9am - 5pm,"Solidity, NodeJS, Homomorphic Encryption, Maps...",,,1.0,Trusted setup UI (https://www.notion.so/Truste...,"February 10, 2023 9:42 PM",,,,,UTC -5,Nicolas Serrano,PSE (Trusted Setup / P0tion / Definitely Setup)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,Wenkang Chen (Grantee - Proposal: ZKML - AIGC ...,,,,Discord: \nEmail: mailto:cwk1998@hotmail.com\n...,,,,,,,"December 22, 2023 1:30 PM",Email,,,@cwkang,,"Email,@cwkang",PSE
126,Ali Atia (Grantee - zkEVM Technical Coordinator),,,,Discord: aliatiia\nEmail: atiia@cs.mcgill.ca\n...,,,,,,,"January 9, 2024 5:29 PM",aliatiia,,atiia@cs.mcgill.ca,,,aliatiia,PSE
127,Yugo Fujii (Grantee - Folding by hand (Nova by...,,,,Discord: yugokoral\nEmail: mailto:yuu5fujii@gm...,,,,,,,"January 10, 2024 9:46 AM",yugokoral,,,,,yugokoral,PSE
128,Ben Tseng (Grantee - fhe-http ),,,,Discord: \nEmail: mailto:abcdefg35216874@gmail...,,,,,,,"January 19, 2024 2:37 PM",Email,,,,,Email,PSE
