In [None]:
import pandas as pd 
from datetime import datetime as dt 
import os
import glob 
from envoke import *
from email_content import html_content 
import time

In [None]:
load_dotenv()

In [None]:
filepath = "FILEPATH"
os.chdir(filepath)

In [None]:
file_list = glob.glob("Returns/*.xlsx")             # Selecting oldest file from "Survey Distribution" folder - one to be added each week on Wednesday, script run by Friday 
latest_file = max(file_list, key=os.path.getctime)
email_list = pd.read_excel(latest_file)

In [4]:
email_list.columns = ["return_number", "row_check", "modified_on", "name", "corp_name", "return_type", "fiscal_year", "cao_revision", "contact_filing_return", 
                      "contact_filing_noc", "created_on", "created_by", "email_contact", "email_registration", "first_name", "last_name"]
email_list.index = email_list.email_registration
email_list_less = email_list.drop_duplicates(subset=["email_registration"])                 # "less" because I have dropped the duplicates.

In [None]:
file_list = glob.glob(f"{filepath}/Logs/*.csv")                                             # all previous surveys sent (log file)

email_list = email_list_less.index.to_list()                                                # retrieve only the emails from the returns file 

past_emails = pd.concat((pd.read_csv(file) for file in file_list), ignore_index=True)["email_contact"].unique().tolist() # getting only the emails from log files

to_send = [email for email in email_list if email not in past_emails]                       # identifying list of NEW emails, excluding previous recipients. 

email_list_less = email_list_less[email_list_less["email_contact"].isin(to_send)]           # filtering the returns submission list to exclude people who have received emails in past 

contact_dicts = create_contact_dicts(email_list_less)                                       # turning it into a dict using the info from df

In [None]:
with requests.sessions.Session() as session:                                # need to speed up with parallel processing, limited to 3 concurrent requests.
    session.auth = (os.getenv("USERNAME"), os.getenv("PASSWORD")) 
    value_list = []
    contact_log = {}
    total_emails = len(contact_dicts)
    count = 0


    for contact in contact_dicts:
        

        response = upsert_contact(contact, session.auth)                    # if contact exists, return info. If not, create contact

   

        time.sleep(0.4)                                                     # avoiding rate limits, need to confirm what they are

        if response.json()['success']:        

            contact_log["first_name"] = contact["first_name"]               # Begin logging here because you want a successful response first.
            contact_log["last_name"] = contact["last_name"]
            contact_log["email_contact"] = contact['email']        

            response = send_survey(contact, html_content, session.auth)     # Actual survey distribution based on email content created by Noa. 

            contact_log["survey_sent_at"] = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

            count += 1                                                      # for updates, perhaps I should switch to a progress bar format

            time.sleep(0.4)

            print(f"{count}/{total_emails} Successful delivery to {contact["email"]}. Details saved to log")
            
        else: 
            value_list.append({contact["email"]: response.text})            # storing emails that did not have a successful distribution.
            print(f"Error processing {contact["email"]}, {response.text}")

    
    if len(value_list) > 0:                                                # if there are any emails in the reject pile - try again? probably. 

        print(f"The following emails have not been sent: {value_list}")

    else: 
        contact_log.to_csv(f"{filepath}/Logs/{dt.datetime.now().strftime("%Y-%m-%d")}_SurveyLog.csv") # if everything is good, save the file. 

        print(f"All emails have been sent successfully, detailes saved to {dt.datetime.now().strftime("%Y-%m-%d")}_SurveyLog.csv")


