In [None]:
# Import necessary libraries
import re
import time
import os
import chardet
import pandas as pd
import numpy as np
from tqdm import tqdm
import openai

# Load OpenAI API key
openai.api_key = os.getenv('OPENAI_KEY')

# Define function to detect encoding
def detect_encoding(filename):
    with open(filename, "rb") as f:
        result = chardet.detect(f.read())
    return result["encoding"]

# Define function to extract address data
def extract_address_data(df):
    # Define regex patterns
    street_pattern = r"Street: (.*)\W"
    postcode_pattern = r"Postcode: (.*)\W"
    city_pattern = r"City: (.*)\W"
    state_pattern = r"State: (.*)\W"

    previous_name = None
    with tqdm(total=len(df), desc='Processing rows') as pbar:
        for index, row in df.iterrows():
            #start at which row? replace 0 with 10 is you want to start with row 10 of your data
            if index <0:
                continue

            company_name = row['Account_Name']
            if company_name == previous_name:
                continue
            previous_name = company_name
            address = row["Mailing_Address"]
            
            if not address or pd.isnull(address) or address.strip() == "":
                pbar.update(1)
                continue
            
            response = openai.Completion.create(
                engine="text-davinci-003",
                prompt=f"Extract street name, postcode, city, state, and country from address: {address}...",
                max_tokens=1024,
                temperature=0.5,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0
            )

            extracted_data = response.choices[0].text

            # Apply regex patterns
            street_match = re.search(street_pattern, extracted_data)
            postcode_match = re.search(postcode_pattern, extracted_data)
            city_match = re.search(city_pattern, extracted_data)
            state_match = re.search(state_pattern, extracted_data)

            # Update dataframe with extracted data
            if street_match:
                df.at[index, "Mailing_Street"] = street_match.group(1)
            else:
                df.at[index, "Mailing_Street"] = None

            if postcode_match:
                df.at[index, "Mailing_Zip"] = postcode_match.group(1)
            else:
                df.at[index, "Mailing_Zip"]  = None

            if city_match:
                df.at[index, "Mailing_City"] =city_match.group(1)
            else:
                df.at[index, "Mailing_City"] = None

            if state_match:
                df.at[index, "Mailing_State"] = state_match.group(1)
            else:
                df.at[index, "Mailing_State"] = None
            
            pbar.update(1)
            time.sleep(2)

# Load and process CSV file
filename = "path_to_your_file" # change to your actual file path
encoding = detect_encoding(filename)
df = pd.read_csv(filename, encoding=encoding)
extract_address_data(df)

# Save updated dataframe to new CSV file
df.to_csv("updated_file.csv", index=False)
