# Imports

In [1]:
import re

import gspread as gs
import pandas as pd

In [2]:
# !sudo /bin/bash -c "(source /venv/bin/activate; pip install gspread)"

# Configs

In [3]:
json_key = "steady-computer-354216-eb3e67b30a7b.json"
gc = gs.service_account(filename=json_key)

# Load the data

## MIG

In [4]:
# Configuration for MIG.
mig_link = "https://docs.google.com/spreadsheets/d/1gxOVAtjk_oEz7WsNVfdST67SupISZ2U-ePUzuep5IEo/edit#gid=0"
mig_env = gc.open_by_url(mig_link)

In [5]:
# Read MIG from gsheet.
mig_ws = mig_env.worksheet("Firms")
mig = pd.DataFrame(mig_ws.get_all_records())
# Unify the absense of e-mails.
mig["Submit by email"] = mig["Submit by email"].replace({"": "not available"})
mig.head(3)

Unnamed: 0,Name,Priority,Status,Website,Crypto-specific,Link to submission page,Submit by email,Submit by form,LinkedIn URL,LinkedIn Person 1,LinkedIn Person 2,LinkedIn Person 3,Invested in,Comments,When added,Source
0,a16z crypto,1.0,,https://a16z.com/\nhttps://a16zcrypto.com/,,https://a16z.com/about/contact/,not available,,https://www.linkedin.com/company/andreessen-ho...,https://www.linkedin.com/in/anthony-albanese-9...,https://www.linkedin.com/in/janelippencott/,https://www.linkedin.com/in/mkhsu/,Talos,,2022-05-04,Look at investors that co-invested with Coinba...
1,a16z fintech,,,,,,not available,,,,,,,,,
2,Avon Ventures,1.0,,,,,not available,,,https://www.linkedin.com/in/balaji-varadhan/,,,BlockFi,"affiliated with FMR LLC, the parent company of...",2022-05-04,Look at investors that co-invested with Coinba...


## Mail_merge

In [6]:
# Configuration for `Mail_merge`.
mm_link = "https://docs.google.com/spreadsheets/d/11AXt9Yzwmk1is_wprFDuE3vbS67gpwOO8gRfB4teC34/edit#gid=348677750"
mm_env = gc.open_by_url(mm_link)
worksheet_list = mm_env.worksheets()
worksheet_list

[<Worksheet '2022-06-22' id:1169365049>,
 <Worksheet '06-21-bis-VC@General' id:348677750>,
 <Worksheet '06-21-VC@Consensus' id:1499066966>,
 <Worksheet '06-20-VC@Consensus' id:1684806412>,
 <Worksheet 'Лист19' id:2051560781>,
 <Worksheet '06-20-Inv@Consensus' id:1879161339>,
 <Worksheet '06-18-Inv@Consensus' id:994429349>]

In [7]:
mail_merge = []
for i in range(len(worksheet_list)):
    df_tmp = pd.DataFrame(mm_env.get_worksheet(i).get_all_records()).iloc[:, :4]
    df_tmp.columns = ["Email", "Name", "Company", "Consensus"]
    mail_merge.append(df_tmp)
mail_merge = pd.concat(mail_merge)
mail_merge.tail(3)

Unnamed: 0,Email,Name,Company,Consensus
8,jason.wu@definer.org,Jason,GmailMerge,Read
9,katrinawhq@gmail.com,Katrina,GmailMerge,Read
10,richard@iterativeventure.com,Richard,GmailMerge,Read


# Select the contacts that haven't been reached yet

## Drop the contacts that are already in the pipeline

In [8]:
# Convert MIG to the e-mail reach format.
mig_emails = mig[mig["Submit by email"] != "not available"][
    ["Submit by email", "Name"]
]
mig_emails.shape

(451, 2)

In [9]:
# Extract e-mails that were previously used in campaign.
mail_merge_emails = list(mail_merge["Email"])
# Filter by those names for new unique e-mails.
new_iteration = mig_emails[~mig_emails["Submit by email"].isin(mail_merge_emails)]
display(new_iteration)

Unnamed: 0,Submit by email,Name
172,info@vetamercap.com,Vetamer Capital Management
197,https://andromedavc.io/#!/contact,Andromeda Capital
226,hello@icodrops.com,Drop Ventures
235,frank@fermioncapitalcorp.com,Fermion
236,adri@fermioncapitalcorp.com,Fermion
...,...,...
699,press@trueventures.com,True Ventures
718,ir@visa.com,Visa
729,hello@tallycapital.com,Tally capital
730,qwang@puzzle.venture,Puzzle ventures


## Sanity check

In [10]:
def check_email_format(email):
    # Regular expression for validating an Email.
    regex_email = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
    # Sanity check.
    if not (re.fullmatch(regex_email, email)):
        email = None
    return email

In [11]:
new_iteration_cleaned = new_iteration.copy()
# Replace "bad" emails with NaN.
new_iteration_cleaned["Submit by email"] = new_iteration_cleaned[
    "Submit by email"
].apply(lambda x: check_email_format(x))
# Save "bad" emails in a separate DataFrame for manual check.
bad_emails_list = list(
    new_iteration_cleaned[new_iteration_cleaned["Submit by email"].isna()]["Name"]
)
bad_emails = new_iteration[new_iteration["Name"].isin(bad_emails_list)]
bad_emails

Unnamed: 0,Submit by email,Name
197,https://andromedavc.io/#!/contact,Andromeda Capital
718,ir@visa.com,Visa


In [12]:
# Get rid of NaNs in "clean" contacts.
new_iteration_cleaned = new_iteration_cleaned[
    new_iteration_cleaned["Submit by email"].notna()
]
new_iteration_cleaned

Unnamed: 0,Submit by email,Name
172,info@vetamercap.com,Vetamer Capital Management
226,hello@icodrops.com,Drop Ventures
235,frank@fermioncapitalcorp.com,Fermion
236,adri@fermioncapitalcorp.com,Fermion
237,contact@figment.io,Figment
...,...,...
691,press@thresholdvc.com,Threshold
699,press@trueventures.com,True Ventures
729,hello@tallycapital.com,Tally capital
730,qwang@puzzle.venture,Puzzle ventures


## Save the file

In [13]:
# new_iteration_cleaned.to_csv("new_iteration.csv")