In [None]:
# !pip install mail-parser
# !pip install tabulate
# !pip install pyyaml
# !pip install pandas

In [1]:
import yaml

def prompt_loader(path: str) -> str:
    """
    The function `prompt_loader` reads and returns the content of a file specified by the input path.
    
    :param path: The `path` parameter in the `prompt_loader` function is a string that represents the
    file path to the prompt file that you want to load. This function reads the content of the file
    located at the specified path and returns it as a string
    :type path: str
    :return: The function `prompt_loader` returns a string containing the content loaded from the file
    located at the specified path.
    """
    prompt = ""
    with open(path, 'r') as file:
        prompt = yaml.safe_load(file)

    return prompt

In [2]:
import subprocess

def run_command(command, cwd=None):
    """Run a shell command."""
    result = subprocess.run(command, shell=True, text=True, cwd=cwd, capture_output=True)
    if result.returncode == 0:
        print(result.stdout)
    else:
        print(f"Error: {result.stderr}")
        raise Exception("Command failed")
    return result

def git_add_commit_push(commit_message, branch_name="main"):
    """Automate git add, commit, and push."""
    try:
        # Navigate to the repository
        print("Navigating to the repository...")
        
        # Run git add
        print("Adding files...")
        run_command("git add .")

        # Run git commit
        print("Committing changes...")
        run_command(f'git commit -m "{commit_message}"')

        # Run git push
        print("Pushing to remote...")
        run_command(f"git push origin {branch_name}")

        print("All changes have been pushed successfully!")

    except Exception as e:
        print(f"An error occurred: {e}")


In [4]:
import sys
import enum
from os import listdir
from os.path import isfile, join
import pandas
import mailparser
from datetime import datetime
import typing_extensions as typing
from llm_solution import LLMSolution


class ContentType(enum.Enum):
    COURSE = "Course"
    TUTORIAL = "Tutorial"
    BOOK = "Book"
    PRODUCTorTOOL = "Product/Tool"
    CODE = "Code"
    NEW_MODEL = "NewModel"
    FUNDING = "Funding"

class ContentEntry(typing.TypedDict):
    ContentType: ContentType
    Description: str
    link: str
    tag: list[str]

client_provider = "gemini"
model_name = "gemini-2.0-flash-exp"
llm_client = LLMSolution(llm_client=client_provider, model_name=model_name)

summarize_prompt = prompt_loader("summarize.yaml")

email_path = "data/"
done_file_name = 'done.txt'
onlyfiles = [join(email_path, f) for f in listdir(email_path) if isfile(join(email_path, f))]

with open(done_file_name) as file:
    already_done_files = [line.rstrip() for line in file] 

done_file = open(done_file_name, 'a')

try:
    
    for idx, filename in enumerate(onlyfiles):
        if filename in already_done_files:
            continue

        try:
            email = mailparser.parse_from_file(filename)
        except Exception as e:
            print("Error parsing email: ", filename)
            print(e)
            continue

        print("Processing email: ", filename)
        system_prompt = summarize_prompt['system_prompt']
        args = {'args': email.text_plain} 
        user_prompt = llm_client.enrich_prompt(prompt=summarize_prompt['user_prompt'], args=args)
        current_table = llm_client.create_response(system_prompt=system_prompt, user_prompt=user_prompt, response_format=ContentEntry)

        df = pandas.DataFrame.from_dict(current_table)
        df_split = pandas.DataFrame(df['tag'].to_list(), columns=['Tag1', 'Tag2', 'Tag3', 'Tag4', 'Tag5'])
        df = pandas.concat([df, df_split], axis=1)
        df = df.drop(columns=['tag'])
        df.to_csv("data.csv", mode='a', sep=',', encoding='utf-8', header=True, index=False)

        done_file.write(filename + "\n")
        # if idx > 5:
        #     break
except Exception as e: 
    print("An error occurred: ", e)
finally:
    done_file.close()




Initializing gemini client with model: gemini-2.0-flash-exp ... 
Processing email:  data/Scale AI raises $1B _ First European AI rules to take effect in weeks _ Google to show ads in AI-generated search summaries.eml
Processing email:  data/🧐 OpenAI further increases Microsoft tensions_.eml
Processing email:  data/GenAI with Python_ Build Agents from Scratch (Complete Tutorial) _ Mauro Di Pietro in TDS Archive.eml
Processing email:  data/😺 AI work habits REVEALED.eml
Processing email:  data/Amazon Alexa+ 🤖, Instagram Reels app 📱, Nvidia squeezes partners 💰.eml
Processing email:  data/Apple's robot lamp 💡, Google AI Mode 🤖, TikTok's dev exodus 👨‍💻.eml
Processing email:  data/😺 Spot the AI Katy Perry.eml
Processing email:  data/😺 GPT5 will be FREE_!.eml
Processing email:  data/😺 Satya slams AGI hype.eml
Processing email:  data/Siri all-hands leaks 📱, YC's historic cohort 📈, running major projects 👨‍💻.eml
Processing email:  data/Action Required_ Recurring buys failed - please add funds to

In [None]:
# df = pandas.DataFrame.from_dict(output_table)
# df_split = pandas.DataFrame(df['tag'].to_list(), columns=['Tag1', 'Tag2', 'Tag3', 'Tag4', 'Tag5'])
# df = pandas.concat([df, df_split], axis=1)
# df = df.drop(columns=['tag'])
# df.to_csv("data.csv", mode='a', sep=',', encoding='utf-8', header=True, index=False)

# today_date = datetime.today().strftime('%Y-%m-%d')
# with open("README.md", 'a') as file:
#     markdown_content = "\n\n## Added on " + str(today_date) + "\n" + markdown_content
#     file.write(markdown_content + "\n\n")

# commit_message = "Added new course content"
# branch_name = "main"  # Replace with the branch name if different
# git_add_commit_push(commit_message, branch_name)