In [None]:
import sys
import os
from openai import OpenAI

# Go to the parent of the parent directory
grandparent_dir = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.append(grandparent_dir)

# Now you can import your config
from config import api_key

client = OpenAI(api_key=api_key)

In [None]:
import openai
from langchain.llms import OpenAI
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
from langchain.document_loaders import CSVLoader

import pandas as pd
from tabulate import tabulate
import os

In [None]:
import os

os.environ["OPENAI_API_KEY"] = api_key

In [None]:
loader = CSVLoader(file_path="dataset.csv")
data = loader.load()

In [None]:
data[0].page_content.split("\n")

In [None]:
def parse_page_content(content):
    fields = content.split("\n")
    return {field.split(': ')[0]: field.split(": ")[1] for field in fields if field}

In [None]:
parsed_data = [parse_page_content(document.page_content) for document in data]

In [None]:
df = pd.DataFrame(parsed_data)
df.head()

In [None]:
df = df[df["Age"]!=""]
average_age = df["Age"].astype(float).mean()

# most popular product category
popular_category = df["Product Category"].value_counts().idxmax()

# gender distribution 
gender_distribution = df["Gender"].value_counts()
print(tabulate(gender_distribution.items(), headers=["Gender","Count"], tablefmt="grid"))

In [None]:
chat_template = ChatPromptTemplate.from_messages(
        [SystemMessage(content=("You are and expert data analysis assistent")),
         HumanMessagePromptTemplate.from_template(
             """
             I have a dataset of customer purchaes with the following characteristics:
             - Average age of customers {average_age}
             - Gender distribution: {gender_distribution}

             Based on this information, can you provide insights into the potential marketing strategies and product recommendation
             """
         )
        ])

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
output = llm.invoke(chat_template.format_messages(average_age=average_age, gender_distribution=gender_distribution))
print(output.content)

In [None]:
import openai
import pandas as pd
from langchain_openai import OpenAI
from langchain.chains import LLMChain, SequentialChain
from langchain.prompts import PromptTemplate
from IPython.display import display, Markdown
import os

In [None]:
os.environ["OPENAI_API_KEY"] = api_key

In [None]:
try:
    data = pd.read_csv("dataset.csv")
except Exception as e:
    print(f"Error loading CSV file: {e}")
    raise # Raise for furhter handling

In [None]:
df = df[:100]
df.head()


In [None]:
from langchain.schema.output_parser import StrOutputParser

# Create an OpenAI chat LLM
llm = ChatOpenAI(model="gpt-4o-mini", api_key=api_key)

analysis_template = """
Analyze the following data and extract key insight
Data:
{content_data}

Key insight:
"""

analysis_prompt_template = PromptTemplate(input_variables=["content_data"], template=analysis_template)
lmm_chain = analysis_prompt_template | llm | StrOutputParser()

response = lmm_chain.invoke({"content_data": data})
print(response)

In [None]:
summary_template = """
Generate a report based on these key insights

Key Insights: {insights} 

Summary: report"""
summary_prompt_template = PromptTemplate(
    input_variables=['insights'], 
    template=summary_template)

seq_chain = ({"insights": analysis_prompt_template | llm | StrOutputParser()}
             | summary_prompt_template
             | llm
             | StrOutputParser())

print(seq_chain.invoke({"content_data": data}))