In [2]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text
import json
import os

import tiktoken
from openai.embeddings_utils import get_embedding
from sklearn.cluster import AgglomerativeClustering

import openai
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
if os.getenv("OPENAI_API_KEY") is not None:
    print ("OPENAI_API_KEY is ready")
else:
    print ("OPENAI_API_KEY environment variable not found")

# Create an SQLAlchemy engine to connect to the database
engine = create_engine('postgresql://postgres:mysecretpassword@localhost/postgres')

# Read the ASIN values from the CSV file
asin_list = pd.read_csv('asin_list.csv')['asin'].tolist()

OPENAI_API_KEY is ready


In [3]:
query = f"""
    SELECT DISTINCT asin, cluster_label, type, solutions
    FROM weighted_trait_graph 
    WHERE asin IN ({','.join(['%s']*len(asin_list))});
"""

# convert the list to a tuple and put it inside a dictionary
params = {'asin_list': tuple(asin_list)}

weighted_trait_df_graph =  pd.read_sql_query(query, engine, params=asin_list)

In [4]:
for i in weighted_trait_df_graph.index:
    try:
        json_string = weighted_trait_df_graph.solutions.iloc[i]
        if json_string:
            data = json.loads(json_string)
            if isinstance(data, dict):
                # Access 'Problem Statement' key
                problem_statement = data.get('Problem Statement')
                if problem_statement:
                    weighted_trait_df_graph.at[i, 'cluster_problem_statement'] = problem_statement
                
                # Access 'Solution 1' key
                solution_1 = data.get('Solution 1')
                if solution_1 and isinstance(solution_1, dict):
                    solution_1_title = solution_1.get('Title')
                    solution_1_description = solution_1.get('Description')
                    weighted_trait_df_graph.at[i, 'cluster_solution_1_title'] = solution_1_title
                    weighted_trait_df_graph.at[i, 'cluster_solution_1_description'] = solution_1_description
                
                # Access 'Solution 2' key
                solution_2 = data.get('Solution 2')
                if solution_2 and isinstance(solution_2, dict):
                    solution_2_title = solution_2.get('Title')
                    solution_2_description = solution_2.get('Description')
                    weighted_trait_df_graph.at[i, 'cluster_solution_2_title'] = solution_2_title
                    weighted_trait_df_graph.at[i, 'cluster_solution_2_description'] = solution_2_description
                
                # Access 'Solution 3' key
                solution_3 = data.get('Solution 3')
                if solution_3 and isinstance(solution_3, dict):
                    solution_3_title = solution_3.get('Title')
                    solution_3_description = solution_3.get('Description')
                    weighted_trait_df_graph.at[i, 'cluster_solution_3_title'] = solution_3_title
                    weighted_trait_df_graph.at[i, 'cluster_solution_3_description'] = solution_3_description

    except (json.JSONDecodeError, ValueError) as e:
        # Handle JSONDecodeError and ValueError
        # Perform necessary actions or set default values
        print(f"Error processing row {i}: {e}")


Error processing row 2: Expecting ',' delimiter: line 3 column 1 (char 512)
Error processing row 20: Expecting ',' delimiter: line 3 column 1 (char 512)
Error processing row 40: Expecting ',' delimiter: line 3 column 1 (char 512)
Error processing row 51: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
Error processing row 64: Expecting ',' delimiter: line 3 column 1 (char 512)
Error processing row 81: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
Error processing row 103: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
Error processing row 108: Expecting ',' delimiter: line 3 column 1 (char 512)
Error processing row 115: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
Error processing row 119: Expecting ',' delimiter: line 3 column 1 (char 512)
Error processing row 124: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
Error processing row 133: Exp

In [5]:
#for column in ['cluster_problem_statement', 'cluster_solution_1_title', 'cluster_solution_1_description', 'cluster_solution_2_title', 'cluster_solution_2_description', 'cluster_solution_3_title', 'cluster_solution_3_description']:
for column in ['cluster_problem_statement']:
    with engine.connect() as con:
        # Add cluster_label column if it doesn't exist
        con.execute(f"ALTER TABLE weighted_trait_graph ADD COLUMN IF NOT EXISTS {column} VARCHAR;")

        for index, row in weighted_trait_df_graph.iterrows():
            asin_val = row['asin']
            cluster_label_val = row['cluster_label']
            type_val = row['type']
            column_val = row[column]

            query = f"""
                UPDATE weighted_trait_graph
                SET {column} = '{str(column_val).replace("'", "''")}'
                WHERE type = '{type_val}'
                    AND cluster_label = '{cluster_label_val}'
                    AND asin = '{asin_val}';
                """
            
            try:
                con.execute(query)
            except Exception as e:
                print(f"An error occurred during the execution of the query:\n{query}\nError message: {str(e)}")


In [6]:
## Generates a expanded dataframe dedicated for the solutions
cluster_solution_df = weighted_trait_df_graph.copy()

# Perform first melt
melted_df_title = pd.melt(cluster_solution_df,
                          id_vars=['asin', 'cluster_label', 'type', 'cluster_problem_statement'],
                          value_vars=['cluster_solution_1_title', 'cluster_solution_2_title', 'cluster_solution_3_title'],
                          var_name='solution_number', 
                          value_name='cluster_solution_title')

melted_df_title.drop(columns = 'solution_number', inplace = True)
melted_df_title.drop_duplicates(inplace = True)

# Perform second melt
melted_df_description = pd.melt(cluster_solution_df,
                                id_vars=['asin', 'cluster_label', 'type', 'cluster_problem_statement'],
                                value_vars=['cluster_solution_1_description', 'cluster_solution_2_description', 'cluster_solution_3_description'],
                                var_name='solution_number', 
                                value_name='cluster_solution_description')

melted_df_description.drop(columns = 'solution_number', inplace = True)
melted_df_description.drop_duplicates(inplace = True)
melted_df_description

# Merge the two dataframes
cluster_solutions_df = pd.merge(melted_df_title, melted_df_description, 
                     on=['asin', 'cluster_label', 'type', 'cluster_problem_statement'], how='inner')

del melted_df_title, melted_df_description

cluster_solutions_df.drop_duplicates(inplace = True)

In [7]:

embedding_model = "text-embedding-ada-002"
embedding_encoding = "cl100k_base"  # this the encoding for text-embedding-ada-002
max_tokens = 8000  # the maximum for text-embedding-ada-002 is 8191
encoding = tiktoken.get_encoding(embedding_encoding)
    

def get_text_from_embedding(embedding):
    return openai.Embedding.retrieve(embedding, model="text-embedding-ada-002")["data"][0]["text"]

In [8]:
n_clusters = 10

df = cluster_solutions_df.copy()
df.drop(columns = ['asin'], inplace = True)

df['cluster_solution_title']= df['cluster_solution_title'].map(str)
df['cluster_solution_description']= df['cluster_solution_description'].map(str)
# omit observations that are too long to embed
df["n_tokens"] = df['cluster_solution_description'].apply(lambda x: len(encoding.encode(x)))
df = df[df.n_tokens <= max_tokens]

In [9]:
df.drop_duplicates(subset='cluster_solution_description', inplace=True, keep='first')


In [10]:
# Get embeddings
df["embedding"] = df['cluster_solution_description'].apply(lambda x: get_embedding(x, engine=embedding_model))
df["embedding"] = df["embedding"].apply(np.array)  # convert string to numpy array
matrix = np.vstack(df.embedding.values)

In [11]:
# Fit clusters
n_clusters = n_clusters  # Adjust as needed
clustering = AgglomerativeClustering(n_clusters=n_clusters)
labels = clustering.fit_predict(matrix)

# Add cluster labels to dataframe and create clusters dictionary
df["cluster"] = labels
clusters_dict = {}
for i in range(n_clusters):
    clusters_dict[i] = df[df.cluster == i]['cluster_solution_title'].values.tolist()

In [12]:
df.sort_values(by=['cluster'], inplace=True)

In [13]:
df[df.cluster == 0]['cluster_solution_title'].values.tolist()

['Implementing Noise Reduction Technology',
 'Implementing Noise Reduction Technology',
 'Improving the Magnet Design for Better Stability',
 'Improving the Quality and Durability of the Balls',
 'Reducing the Noise Level of the Product',
 'Reducing the Clicking Sound Produced by the Beads and Pen',
 'Reducing the Noise Level of the Magnetic Beads and Balls',
 'Implementing Noise Reduction Technology',
 'Reducing the Noise Level of the Toy',
 'Reducing the Noise Level of the Toy']

In [14]:
df[df.cluster == 0]['cluster_solution_description'].values.tolist()

["To provide users with more control over the noise level, a volume control feature could be added to the product. This feature would allow users to adjust the volume to their desired level, reducing the noise level to a more comfortable level. The volume control could be designed to be easily accessible and user-friendly, with clear markings to indicate the volume level. Additionally, the product's packaging and marketing materials should clearly communicate this feature to potential buyers to inform them of the added value they will receive. However, a thorough analysis should be conducted to determine the feasibility of adding this feature, taking into account the product's design and manufacturing costs.",
 "To address the issue of noise, the product design should be reviewed to identify the source of the noise. Once identified, noise reduction technology can be implemented to reduce the noise level. For example, adding sound-absorbing materials to the product's interior or using v

In [15]:
df[df.cluster == 1]['cluster_solution_title'].values.tolist()

['Reducing the Noise Level of the Product',
 'Expanding the Product Line to Include Larger Size Options',
 'Expanding the Product Line to Include Larger Size Options',
 'Improving the Bead Design and Hole Pattern',
 'Expanding the Product Line to Include Larger Size Options',
 'Redesigning the Stylus Pen with Improved Attachment Mechanism']

In [16]:
df[df.cluster == 1]['cluster_solution_description'].values.tolist()

["The product's hole pattern limitations have been reported as a significant issue by some users, particularly those with specific sensory needs. To address this, the product's design should be reviewed to identify the feasibility of expanding the hole pattern options. This could involve creating multiple hole pattern options or a customizable hole pattern design. Additionally, the product's packaging and marketing materials should be updated to highlight the expanded hole pattern options as a key feature, enhancing the product's perceived value and marketability. However, a thorough analysis should be conducted to determine the feasibility of this solution based on user needs, manufacturing feasibility, and cost implications.",
 "To make the product more engaging for both kids and adults, the manufacturer should consider incorporating more engaging features. For kids, this could include adding more colors, shapes, or textures to the drawing surface, while for adults, this could includ

In [17]:
df[df.cluster == 2]['cluster_solution_description'].values.tolist()

["To address the issues related to the pen's quality and durability, the manufacturer should explore new materials and design modifications. The use of high-quality materials like polycarbonate or HDPE could provide a long-lasting solution. Additionally, a comprehensive review of the pen design should be conducted to ensure it can withstand regular use without significant wear or tear. This upgraded design should undergo rigorous testing to ensure it meets the required safety standards and enhances customer satisfaction.",
 "To address the issue of the pen and magnet being easily breakable, a redesign of the product should be considered. The materials used in the product's construction should be reviewed to ensure that they are durable and long-lasting. Additionally, the product should undergo rigorous testing to ensure that it can withstand regular use without significant wear or tear. A thorough analysis of the current design and user feedback should be conducted to determine the mos

In [18]:
df[df.cluster == 2]['cluster_solution_title'].values.tolist()

['Improving the Quality and Durability of the Stylus Pen',
 'Improving the Bead Design and Hole Pattern',
 'Improving Durability and Quality Control',
 'Redesigning the Stylus Pen with Improved Attachment Mechanism']

In [19]:
# Read data about the product

with open('/Users/vladbordei/Documents/Development/oaie2/summarised_simplified_product_information.json') as file:
    json_string = file.read()
    general_product_data = json.loads(json_string)

In [33]:
User_Prompt_1 = f"""\
You are a highly experienced industrial product design engineer.\
You are asked to review a series of solutions proposed by a team of junior engineers.\
The solutions are for improving an existing product.\
Expected Output means two changes that can be made to the product and is technical. \
It will be implemented by an engineer so it has to be as precise as posible. No bla bla.\
Simple solutions are better than complex ones. Consider the cost of implementing the solution and opt for cheaper solutions.\
\
{general_product_data}
\
Observations Type: 'Fact',\
Cluster Label: 'Drawing Board with Pen',\
\
Output will be a JSON file with the following structure:
```{{\
"Product Improvement 1":{{\
"Title": ...\
"Implementation Details for the engineer": ...[700 words]\
}},\
"Product Improvement 2":{{\
"Title": ...\
"Implementation Details for the engineer": ...[700 words]\
}}}}\
```PROBLEMS OBSERVED BY THE JUNIOR ENGINEERS: ```['The product, a car toy for kids, has been reported to be noisy, \
which has led to user dissatisfaction, negatively impacting the overall perceived value and marketability of the product.', \
"The product, a magnetic drawing board set, has received mixed reviews from customers. While some appreciate the sensory features, \
others have reported issues with the product's loudness, magnetic functionality, and hole pattern limitations. \
These problems have led to user dissatisfaction, negatively impacting the overall perceived value and marketability of the product."]\
\
```PROPOSED SOLUTIONS: ```["Adding a volume control feature could let users adjust noise levels to their preference. \
This feature needs to be user-friendly and easily accessible. The product's packaging and marketing materials should \
highlight this feature, but a feasibility analysis considering costs is necessary.",\
"The product's noise level could be lessened by reviewing and altering its internal components. \
Noise-dampening materials like foam or rubber could be used. Highlighting this improvement in \
packaging and marketing materials could increase perceived value and marketability.",\
"To address noise and ball mechanism efficiency, a quieter ball like foam, stronger magnets, \
and a warning label about noise could be implemented. Including a noise reduction rating on the \
packaging could also inform users of the noise level, enhancing product value."]
"""

In [21]:
User_Prompt_1



In [34]:
AI_Prompt_1 = """\
{\
  "Product Improvement 1": {\
    "Title": "Volume Control Mechanism for Noise Reduction",\
    "Implementation Details for the engineer": {\
      "Objective": "Reduce the noise produced by the movement of the magnetic beads.",\
      "Solution": [\
        "Incorporate a volume control feature that allows users to adjust the noise levels.",\
        "Use noise-dampening materials like foam or rubber around the inside surface of the board."\
      ],\
      "Steps": [\
        "Volume Control: Add a control knob on the side of the board that modifies the interaction between the stylus and the magnetized beads, thus controlling the noise level.",\
        "Noise-Dampening Materials: Include foam or rubber materials inside the board to lessen the impact noise made by the beads."\
      ],\
      "Considerations": [\
        "Conduct a feasibility analysis for cost-effective implementation.",\
        "Ensure the feature is user-friendly and highlighted in packaging and marketing materials."\
      ]\
    }\
  },\
  "Product Improvement 2": {\
    "Title": "Enhanced Magnetic Functionality and Hole Pattern Variety",\
    "Implementation Details for the engineer": {\
      "Objective": "Improve the drawing capabilities of the board by enhancing magnetic functionality and diversifying the hole pattern.",\
      "Solution": [\
        "Redesign the hole pattern on the board to allow for more creative drawings.",\
        "Use stronger magnets in the stylus pen and the board for a smoother drawing experience."\
      ],\
      "Steps": [\
        "Hole Pattern: Employ CAD software to create a variety of hole patterns. Use CNC machining or injection molding for manufacturing.",\
        "Stronger Magnets: Source and test different types of magnets to determine the most effective and affordable options."\
      ],\
      "Considerations": [\
        "Add a warning label to inform users of the enhanced magnetic strength.",\
        "Ensure the product meets safety standards and regulations regarding the use of magnets in children's toys.",\
        "Balance cost implications with product functionality and safety."\
      ]\
    }\
  }\
}\
"""

In [35]:
AI_Prompt_1



In [36]:
for cluster in df.cluster.unique():
    print(cluster)

    User_Prompt_2 = f"""\
        ```PROBLEMS OBSERVED BY THE JUNIOR ENGINEERS: ```{df[df.cluster == cluster].cluster_problem_statement.to_list()}\
        ```PROPOSED SOLUTIONS: ```{df[df.cluster == cluster].cluster_solution_description.to_list()}\
        """

    try:
        response = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[
                        {"role": "user", "content": User_Prompt_1},
                        {"role": "assistant", "content": AI_Prompt_1},
                        {"role": "user", "content": User_Prompt_2} ],
                    temperature=0.2,
                    api_key=OPENAI_API_KEY
        )
        chatbot_response = response["choices"][0]["message"]["content"]
        df.loc[df.cluster == cluster, "senior_engineer_solution"] = chatbot_response
        print(chatbot_response)
    except Exception as e:
        print(f"An error occurred during the OpenAI ChatCompletion API call: {e}")

0
{
  "Product Improvement 1": {
    "Title": "Noise Reduction Solutions for Car Toy",
    "Implementation Details for the engineer": {
      "Objective": "Reduce the noise produced by the car toy, which has led to user dissatisfaction and negatively impacted the product's perceived value and marketability.",
      "Solution": [
        "Add a volume control feature that allows users to adjust the noise levels to their preference.",
        "Review the internal components of the toy to identify the source of the noise and add noise-reducing materials like foam or rubber to the design.",
        "Modify the design of the toy to reduce the sound produced when the toy is in use."
      ],
      "Steps": [
        "Volume Control: Add a control knob on the side of the toy that modifies the interaction between the toy's components, thus controlling the noise level.",
        "Noise-Reducing Materials: Include foam or rubber materials inside the toy to lessen the impact noise made by the com

In [37]:
df.to_csv("output.csv", index=False)


In [4]:
df = pd.read_csv("output.csv")

In [38]:
df.columns

Index(['cluster_label', 'type', 'cluster_problem_statement',
       'cluster_solution_title', 'cluster_solution_description', 'n_tokens',
       'embedding', 'cluster', 'senior_engineer_solution'],
      dtype='object')

In [39]:
set(df["senior_engineer_solution"].to_list())

{'As there are no problems observed or proposed solutions provided by the junior engineers, it is not possible to provide any product improvement suggestions. Can you please provide more information or context?',
 '{\n  "Product Improvement 1": {\n    "Title": "Expanded Hole Pattern Options",\n    "Implementation Details for the engineer": {\n      "Objective": "To address the issue of limited hole pattern options and cater to users with specific sensory needs.",\n      "Solution": [\n        "Review the product\'s design to identify the feasibility of expanding the hole pattern options.",\n        "Create multiple hole pattern options or a customizable hole pattern design.",\n        "Update the product\'s packaging and marketing materials to highlight the expanded hole pattern options as a key feature."\n      ],\n      "Steps": [\n        "Design Review: Conduct a thorough analysis of user needs, manufacturing feasibility, and cost implications to determine the most effective approa

In [54]:
import json

for index, row in df.iterrows():
    json_string = row["senior_engineer_solution"]
    if json_string:
        try:
            data = json.loads(json_string)
            for improvement, details in data.items():
                print(f"Product Improvement: {improvement}")
                print(f"Title: {details['Title']}")
                implementation_details = details["Implementation Details for the engineer"]
                print(f"Objective: {implementation_details['Objective']}")
                print("Solution:")
                for solution in implementation_details["Solution"]:
                    print(f"- {solution}")
                print("Steps:")
                for step in implementation_details["Steps"]:
                    print(f"- {step}")
                print("Considerations:")
                for consideration in implementation_details["Considerations"]:
                    print(f"- {consideration}")
                print("-------------------")
        except json.JSONDecodeError as e:
            print(f"Invalid JSON string: {json_string}")
            continue


Product Improvement: Product Improvement 1
Title: Noise Reduction Solutions for Car Toy
Objective: Reduce the noise produced by the car toy, which has led to user dissatisfaction and negatively impacted the product's perceived value and marketability.
Solution:
- Add a volume control feature that allows users to adjust the noise levels to their preference.
- Review the internal components of the toy to identify the source of the noise and add noise-reducing materials like foam or rubber to the design.
- Modify the design of the toy to reduce the sound produced when the toy is in use.
Steps:
- Volume Control: Add a control knob on the side of the toy that modifies the interaction between the toy's components, thus controlling the noise level.
- Noise-Reducing Materials: Include foam or rubber materials inside the toy to lessen the impact noise made by the components.
- Design Modification: Redesign the toy's components to reduce the sound produced when the toy is in use.
Considerations:

In [69]:
import pandas as pd
import json

# Assuming df is your DataFrame and 'senior_engineer_solution' is the column with the JSON data
for index, row in df.iterrows():
    try:
        json_string = row['senior_engineer_solution']
        if json_string:
            try:
                data = json.loads(json_string)
            except json.JSONDecodeError:
                # Attempt to complete the JSON data by closing all possible opened structures
                json_string += '}' * (json_string.count('{') - json_string.count('}'))
                json_string += ']' * (json_string.count('[') - json_string.count(']'))
                json_string += '"' * (json_string.count('"') % 2)
                try:
                    data = json.loads(json_string)
                except json.JSONDecodeError as e:
                    print(f"Error processing row {index} for improvement with JSON data error after attempting fix: {e}")
                    print(f"JSON data after attempted fix: {json_string}")
                    continue

            for improvement, details in data.items():
                try:
                    implementation_details = details['Implementation Details for the engineer']
                    if implementation_details:
                        if improvement == 'Product Improvement 1':
                            df.at[index, 'improvement_1_title'] = details['Title']
                            df.at[index, 'improvement_1_objective'] = implementation_details['Objective']
                            df.at[index, 'improvement_1_solution'] = ', '.join(implementation_details['Solution'])
                            df.at[index, 'improvement_1_steps'] = ', '.join(implementation_details['Steps'])
                            df.at[index, 'improvement_1_considerations'] = ', '.join(implementation_details['Considerations'])
                        elif improvement == 'Product Improvement 2':
                            df.at[index, 'improvement_2_title'] = details['Title']
                            df.at[index, 'improvement_2_objective'] = implementation_details['Objective']
                            df.at[index, 'improvement_2_solution'] = ', '.join(implementation_details['Solution'])
                            df.at[index, 'improvement_2_steps'] = ', '.join(implementation_details['Steps'])
                            df.at[index, 'improvement_2_considerations'] = ', '.join(implementation_details['Considerations'])
                        elif improvement == 'Product Improvement 3':
                            df.at[index, 'improvement_3_title'] = details['Title']
                            df.at[index, 'improvement_3_objective'] = implementation_details['Objective']
                            df.at[index, 'improvement_3_solution'] = ', '.join(implementation_details['Solution'])
                            df.at[index, 'improvement_3_steps'] = ', '.join(implementation_details['Steps'])
                            df.at[index, 'improvement_3_considerations'] = ', '.join(implementation_details['Considerations'])
                        elif improvement == 'Product Improvement 4':
                            df.at[index, 'improvement_4_title'] = details['Title']
                            df.at[index, 'improvement_4_objective'] = implementation_details['Objective']
                            df.at[index, 'improvement_4_solution'] = ', '.join(implementation_details['Solution'])
                            df.at[index, 'improvement_4_steps'] = ', '.join(implementation_details['Steps'])
                            df.at[index, 'improvement_4_considerations'] = ', '.join(implementation_details['Considerations'])
                except KeyError:
                    print(f"KeyError in row {index}, improvement {improvement}")

    except Exception as e:
        print(f"Unexpected error processing row {index}: {e}")


Error processing row 185 for improvement with JSON data error after attempting fix: Expecting ',' delimiter: line 72 column 62 (char 5054)
JSON data after attempted fix: {
  "Product Improvement 1": {
    "Title": "Pen Attachment and Storage Mechanism",
    "Implementation Details for the engineer": {
      "Objective": "Address concerns related to the pen's quality, durability, and functionality, as well as the need for modifications to improve user experience.",
      "Solution": [
        "Incorporate a mechanism into the product design to secure the pen to the board.",
        "Redesign the pen storage to prevent the loss of pens."
      ],
      "Steps": [
        "Pen Attachment: Consider various approaches, such as a designated slot, magnetic attachment, or retractable cord system. Conduct a feasibility analysis to determine the most suitable option based on user needs, manufacturing feasibility, and cost implications.",
        "Pen Storage: Review the board's design to include

In [70]:
df

Unnamed: 0,cluster_label,type,cluster_problem_statement,cluster_solution_title,cluster_solution_description,n_tokens,embedding,cluster,senior_engineer_solution,improvement_1_title,...,improvement_4_title,improvement_4_objective,improvement_4_solution,improvement_4_steps,improvement_4_considerations,improvement_5_title,improvement_5_objective,improvement_5_solution,improvement_5_steps,improvement_5_considerations
333,Noisy Operation,Issue,"The product, a car toy for kids, has been repo...",Implementing Noise Reduction Technology,To provide users with more control over the no...,125,"[0.012593695893883705, 0.0010760125005617738, ...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
331,Noisy Operation,Issue,"The product, a car toy for kids, has been repo...",Implementing Noise Reduction Technology,"To address the issue of noise, the product des...",109,"[0.007600738201290369, 0.016248049214482307, 0...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
285,Magnetic Drawing Tools and Toys,Fact,"The product, a magnetic drawing board set, has...",Improving the Magnet Design for Better Stability,"To address the issue of noise levels, the prod...",103,"[0.01147362869232893, 0.007993975654244423, -0...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
221,Board Game Malfunctions,Issue,"The product, a magnetic drawing board set, has...",Improving the Quality and Durability of the Balls,The noise levels of the product can be address...,103,"[0.01975761167705059, 0.01129576563835144, 0.0...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
202,Sensory Features and Technical Specifications,Fact,"The product, a magnetic drawing board set, has...",Reducing the Noise Level of the Product,The loudness of the product has been a signifi...,105,"[0.012011406011879444, 0.01515285111963749, -0...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
128,Beads and Pen Functionality,Fact,"The product has received mixed reviews, with s...",Reducing the Clicking Sound Produced by the Be...,The clicking sound produced by the beads and p...,112,"[0.0011809485731646419, 0.015025080181658268, ...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
101,Sensory Toys with Balls and Beads,Fact,The product has received multiple complaints r...,Reducing the Noise Level of the Magnetic Beads...,"To address the issue of noise, the magnetic be...",109,"[-0.011232763528823853, 0.004871760495007038, ...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
332,Noisy Operation,Issue,"The product, a car toy for kids, has been repo...",Implementing Noise Reduction Technology,"To mitigate the noise issue, noise-cancelling ...",113,"[0.010962863452732563, 0.023807764053344727, -...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
66,Reduced Noise Levels,Improvement,"The product, a magnetic drawing board set, has...",Reducing the Noise Level of the Toy,To address both the noise level and the ball m...,121,"[-0.012581954710185528, 0.01657983474433422, -...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
64,Reduced Noise Levels,Improvement,"The product, a magnetic drawing board set, has...",Reducing the Noise Level of the Toy,The noise level of the toy can be reduced by e...,139,"[0.00288968812674284, 0.013838870450854301, -0...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,


In [57]:
df

Unnamed: 0,cluster_label,type,cluster_problem_statement,cluster_solution_title,cluster_solution_description,n_tokens,embedding,cluster,senior_engineer_solution,improvement_1_title,...,improvement_4_title,improvement_4_objective,improvement_4_solution,improvement_4_steps,improvement_4_considerations,improvement_5_title,improvement_5_objective,improvement_5_solution,improvement_5_steps,improvement_5_considerations
333,Noisy Operation,Issue,"The product, a car toy for kids, has been repo...",Implementing Noise Reduction Technology,To provide users with more control over the no...,125,"[0.012593695893883705, 0.0010760125005617738, ...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
331,Noisy Operation,Issue,"The product, a car toy for kids, has been repo...",Implementing Noise Reduction Technology,"To address the issue of noise, the product des...",109,"[0.007600738201290369, 0.016248049214482307, 0...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
285,Magnetic Drawing Tools and Toys,Fact,"The product, a magnetic drawing board set, has...",Improving the Magnet Design for Better Stability,"To address the issue of noise levels, the prod...",103,"[0.01147362869232893, 0.007993975654244423, -0...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
221,Board Game Malfunctions,Issue,"The product, a magnetic drawing board set, has...",Improving the Quality and Durability of the Balls,The noise levels of the product can be address...,103,"[0.01975761167705059, 0.01129576563835144, 0.0...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
202,Sensory Features and Technical Specifications,Fact,"The product, a magnetic drawing board set, has...",Reducing the Noise Level of the Product,The loudness of the product has been a signifi...,105,"[0.012011406011879444, 0.01515285111963749, -0...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
128,Beads and Pen Functionality,Fact,"The product has received mixed reviews, with s...",Reducing the Clicking Sound Produced by the Be...,The clicking sound produced by the beads and p...,112,"[0.0011809485731646419, 0.015025080181658268, ...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
101,Sensory Toys with Balls and Beads,Fact,The product has received multiple complaints r...,Reducing the Noise Level of the Magnetic Beads...,"To address the issue of noise, the magnetic be...",109,"[-0.011232763528823853, 0.004871760495007038, ...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
332,Noisy Operation,Issue,"The product, a car toy for kids, has been repo...",Implementing Noise Reduction Technology,"To mitigate the noise issue, noise-cancelling ...",113,"[0.010962863452732563, 0.023807764053344727, -...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
66,Reduced Noise Levels,Improvement,"The product, a magnetic drawing board set, has...",Reducing the Noise Level of the Toy,To address both the noise level and the ball m...,121,"[-0.012581954710185528, 0.01657983474433422, -...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,
64,Reduced Noise Levels,Improvement,"The product, a magnetic drawing board set, has...",Reducing the Noise Level of the Toy,The noise level of the toy can be reduced by e...,139,"[0.00288968812674284, 0.013838870450854301, -0...",0,"{\n ""Product Improvement 1"": {\n ""Title"": ...",Noise Reduction Solutions for Car Toy,...,,,,,,,,,,


In [30]:
# Generates an expanded dataframe dedicated for the solutions
cluster_solution_df = df.copy()

# Perform first melt
melted_df_title = pd.melt(cluster_solution_df,
                          id_vars=[ 'cluster_label', 'type', 'problem_statement'],
                          value_vars=['improvement_1_title', 'improvement_2_title', 'improvement_3_title'],
                          var_name='solution_number', 
                          value_name='solution_title')

melted_df_title.drop(columns = 'solution_number', inplace = True)
melted_df_title.drop_duplicates(inplace = True)

# Perform second melt
melted_df_details = pd.melt(cluster_solution_df,
                            id_vars=[ 'cluster_label', 'type', 'problem_statement'],
                            value_vars=['improvement_1_details', 'improvement_2_details', 'improvement_3_details'],
                            var_name='solution_number', 
                            value_name='solution_details')

melted_df_details.drop(columns = 'solution_number', inplace = True)
melted_df_details.drop_duplicates(inplace = True)

# Merge the two dataframes
cluster_solutions_df = pd.merge(melted_df_title, melted_df_details, 
                     on=['cluster_label', 'type', 'problem_statement'], how='inner')

del melted_df_title, melted_df_details

cluster_solutions_df.drop_duplicates(inplace = True)


In [31]:
cluster_solutions_df.describe()

Unnamed: 0,cluster_label,type,problem_statement,solution_title,solution_details
count,370,370,369,294,294
unique,18,3,9,22,22
top,Pen Attachment and Loss,Fact,"The product, a magnetic drawing board set, has...",Board and Magnet Improvements,To address the issues related to drawing capab...
freq,27,163,72,24,24


In [32]:
cluster_solutions_df.dropna(inplace = True)

In [33]:
cluster_solutions_df.drop_duplicates(inplace = True)

In [34]:
cluster_solutions_df.describe()

Unnamed: 0,cluster_label,type,problem_statement,solution_title,solution_details
count,244,244,244,244,244
unique,17,3,9,22,22
top,Pen Attachment and Loss,Fact,"The product, a magnetic drawing board set, has...",Noise Reduction and Magnet Strength,To address the issue of noise and magnet stren...
freq,27,97,54,18,18


In [35]:
# I want to identify the top 3 solutions for each cluster
cluster_solutions_df.to_csv("cluster_solutions.csv", index=False)

In [36]:
cluster_solutions_df.columns

Index(['cluster_label', 'type', 'problem_statement', 'solution_title',
       'solution_details'],
      dtype='object')

### DE DECIS ULTERIOR IN CE FORMA INCARCAM IN SQL: SOLUTUIONS SAU ALTCEVA?