In [1]:
import re
from datetime import datetime
import pandas as pd

def interpret_investment_prompt(prompt, categories):
    # Extracting the information using regular expressions
    avoid_investment = re.search(r"avoids (.+?)\.", prompt).group(1).split(" and ")
    start_date = re.search(r"investment start date was (.+?)\.", prompt).group(1)
    end_date_match = re.search(r"investment end date was (.+?)\.", prompt)
    end_date = end_date_match.group(1) if end_date_match else datetime.now().strftime('%Y-%m-%d')
    budget = re.search(r"investment budget is \$(\d+)\.", prompt).group(1)
    
    # Filter the avoid_investment categories
    filtered_avoid_investment = [category for category in avoid_investment if category in categories]
    
    # Format dates to yyyy-mm-dd
    start_date = datetime.strptime(start_date, '%B %dth, %Y').strftime('%Y-%m-%d')
    if end_date_match:
        end_date = datetime.strptime(end_date, '%B %dth, %Y').strftime('%Y-%m-%d')
    
    return {
        "Avoids Investment In": filtered_avoid_investment,
        "Investment Start Date": start_date,
        "Investment End Date": end_date,
        "Investment Budget": budget
    }

# Example prompt
generated_context = "Andrew Vega is 77 years old and his investment start date was February 9th, 2016. His investment end date was July 14th, 2016. He enjoys knitting and avoids Real Estate and Construction. Their total investment budget is $49434."
prompt = "What does this person not want to invest in? " + generated_context
categories = ['Unknown', 'Energy', 'Consumer Cyclical', 'Basic Materials', 'Real Estate', 'Industrials', 'Communication Services', 'Healthcare', 'Technology', 'Financial Services', 'Utilities', 'Consumer Defensive']

# Interpreting the prompt
investment_info = interpret_investment_prompt(prompt, categories)
print(investment_info)

{'Avoids Investment In': ['Real Estate'], 'Investment Start Date': '2016-02-09', 'Investment End Date': '2016-07-14', 'Investment Budget': '49434'}


In [None]:

# Load the CSV file into a pandas DataFrame
tickers_df = pd.read_csv(r'C:\Users\Aneurin\python\Hackathon\tickers_with_sectors.csv')

# Filter the DataFrame to exclude rows with sectors in "Avoids Investment In" (case-insensitive)
remaining_tickers = tickers_df[~tickers_df['Sector'].str.lower().isin([sector.lower() for sector in investment_info['Avoids Investment In']])]['Ticker']

# Convert the filtered DataFrame column to a pandas Series
remaining_tickers = remaining_tickers.reset_index(drop=True)

# Save the pandas Series to a CSV file
remaining_tickers.to_csv(r'C:\Users\Aneurin\python\remainingtickers.csv', index=False)