In [3]:
import pandas as pd

# Save the clustered ideas to a new CSV
df = pd.read_csv('output/clustered_gmm_gemini_api_app_ideas.csv')

In [26]:
df.head()

Unnamed: 0,Title,YouTube Link,brief_desc,Built With,By,Location,cluster
0,Davv AI,https://www.youtube.com/watch?v=6ONuSa48fb4,"The application is an AI-powered assistant that simplifies tasks across various operating systems through voice and text interactions, offering features such as natural language command interpretation and executable code generation using the Gemini AI API.","Web/Chrome, google speech recognition","Ali Debbache, abdou guerguer",Algeria,1
1,DDMD,https://www.youtube.com/watch?v=BxzJcNyZNEw,"The application uses a Random Forest-based model to classify digital drug music from audio files, leveraging 34 features extracted from these files and trained on 3,176 non-copyrighted MP3 files. The model is used in a web application that provides an intuitive interface for users to upload or enter YouTube URLs of audio files to be classified.","Web/Chrome, Google Colab, Google Slides, Google Sppech-to-Text AI",,Algeria,0
2,Elfelah,https://www.youtube.com/watch?v=FCaPwgGfqe4,"The application provides real-time disease diagnosis, crop management strategies, and actionable insights to farmers through an AI-powered system that uses external data and machine learning models.",Web/Chrome,Elfelah,Algeria,0
3,FormBlueprint,https://www.youtube.com/watch?v=xUN-rOcofdE,"The application uses FormBlueprint to create a conversational experience for businesses, educators, and researchers by generating form fields and sections with validation and branching logic, and then analyzing data through conversation to gain insights.","Web/Chrome, Google Drive, Google Auth","Mohamed Boudelaa, Amina Daoud",Algeria,0
4,GeoGemini,https://www.youtube.com/watch?v=lfnIxOIv0T4,"The application uses satellite imagery to analyze environmental changes, comparing images to detect trends and identify key features. It leverages advanced AI capabilities from the Gemini API to provide detailed insights and high-level analysis for research, education, and sustainability efforts.","Web/Chrome, Firebase",Ahmane Oussama,Algeria,1


In [6]:
def count_tokens_per_cluster(df, column='brief_desc', cluster_column='cluster'):
    """
    Count the number of tokens and concatenate strings in the specified column for each cluster in the DataFrame.

    Parameters:
    - df: DataFrame containing the data.
    - column: The column name to concatenate and count tokens from.
    - cluster_column: The column name representing clusters.

    Returns:
    - A DataFrame with cluster labels, concatenated strings, token counts, and cluster group numbers.
    """
    results = []

    # Get unique clusters
    unique_clusters = df[cluster_column].unique()

    for cluster in unique_clusters:
        # Filter for rows where the cluster matches the current cluster
        filtered_df = df[df[cluster_column] == cluster]
        
        # Concatenate all the content from the specified column into a single string
        concatenated_string = " ".join(filtered_df[column].values)
        
        # Count the number of tokens in the concatenated string
        token_count = len(concatenated_string.split())
        
        # Append the results as a dictionary
        results.append({
            cluster_column: cluster,
            'concat_ideas': concatenated_string,
            'token_counts': token_count,
        })

    # Convert results to a DataFrame
    result_df = pd.DataFrame(results)
    return result_df

# Example usage:
result_df = count_tokens_per_cluster(df)

In [9]:
import getpass
import os

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google AI API key: ")

In [14]:
from langchain_ollama import ChatOllama
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate

# Initialize the Ollama model
local_model = ChatOllama(
    model="llama3.2:3b-instruct-q8_0",
    temperature=0.2,
)

gemini_model = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.2,
    max_tokens=50,
    timeout=None,
    max_retries=2,
    # other params...
)

system_prompt = """
                Gemini API Application Ideas: {concat_ideas}
                You are a helpful assistant that captures the main theme found in description of the provided Gemini API Application Ideas and describe those themes in a single paragraph in less than 3 sentences.
                """

input = "Tell me the theme you found from the provided ideas in a single paragraph."

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            system_prompt,
        ),
        ("human", "{input}"),
    ]
)

chain = prompt | gemini_model

In [16]:
import time

# Initialize the summary column
result_df['summary'] = None

cluster_num = len(result_df)

# Loop through all clusters
for cluster_index in range(cluster_num):
    concat_ideas = result_df['concat_ideas'][cluster_index][:]
    
    # Invoke the model with the current concat_ideas
    output = chain.invoke(
        {
            "concat_ideas": concat_ideas,
            "input": input,  # Ensure 'input' is defined earlier in your code
        }
    )
    
    # Store the output in the summary column
    result_df.at[cluster_index, 'summary'] = output.content
    
    # Print a success message
    print(f"Summarization for cluster {cluster_index} is successful.")
    
    # Hold for 2 seconds before continuing
    time.sleep(2)

Summarization for cluster 0 is successful.
Summarization for cluster 1 is successful.


In [17]:
result_df.to_csv("output/clusters_summary.csv")

In [19]:
pd.options.display.max_colwidth = 1000

result_df = pd.read_csv("output/clusters_summary.csv")

result_df[['cluster','summary']]

Unnamed: 0,cluster,summary
0,1,"The Gemini API application ideas showcase a wide range of potential uses for this powerful AI technology, with a strong emphasis on personalization and user-centric experiences. Many of the ideas focus on enhancing productivity, learning, and communication through conversational interfaces, intelligent automation, and tailored recommendations. Whether it's simplifying complex tasks, providing personalized support, or creating engaging learning experiences, these applications demonstrate the transformative potential of Gemini AI across diverse fields. \n"
1,0,"The provided Gemini API application ideas showcase a strong theme of using AI to enhance and personalize various aspects of daily life. From education and healthcare to productivity and entertainment, these applications aim to leverage AI's capabilities to simplify tasks, provide personalized recommendations, and improve user experiences across a wide range of domains. The focus is on creating tools that are both innovative and user-friendly, making AI accessible and beneficial for individuals and businesses alike. \n"


In [20]:
# Create a concatenated text with numbered summaries
concatenated_text = ""
clusters = result_df['cluster'].unique()  # Get unique clusters

for cluster in clusters:
    # Filter summaries for the current cluster
    summaries = result_df[result_df['cluster'] == cluster]['summary'].tolist()
    
    for i, summary in enumerate(summaries, start=1):
        concatenated_text += f"Cluster {cluster}, Description: {summary}\n"

# Print or return the concatenated text
print(concatenated_text)

Cluster 1, Description: The Gemini API application ideas showcase a wide range of potential uses for this powerful AI technology, with a strong emphasis on personalization and user-centric experiences.  Many of the ideas focus on enhancing productivity, learning, and communication through conversational interfaces, intelligent automation, and tailored recommendations.  Whether it's simplifying complex tasks, providing personalized support, or creating engaging learning experiences, these applications demonstrate the transformative potential of Gemini AI across diverse fields. 

Cluster 0, Description: The provided Gemini API application ideas showcase a strong theme of using AI to enhance and personalize various aspects of daily life. From education and healthcare to productivity and entertainment, these applications aim to leverage AI's capabilities to simplify tasks, provide personalized recommendations, and improve user experiences across a wide range of domains. The focus is on cre

In [21]:
with open('output/cluster_individual_desc.txt', 'w+') as text_file:
    text_file.write(concatenated_text)

In [27]:
system_prompt = """
                Clusters Description: {concatenated_text}
                You are a helpful assistant that summarizes the provided summary into a single paragraph in less than 3 sentences.
                """

input = "Provide me with an executive summary of the similarities and differences among each cluster's explanation in a single paragraph."

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            system_prompt,
        ),
        ("human", "{input}"),
    ]
)

chain = prompt | gemini_model

# Invoke the model with the current concatenated_text
output = chain.invoke(
    {
        "concatenated_text": concatenated_text,  # Ensure this variable is defined and holds the correct value
        "input": input,
    }
)

print(output.content)

Both clusters highlight the potential of Gemini API applications to enhance and personalize various aspects of daily life.  Cluster 1 emphasizes the focus on user-centric experiences, particularly through conversational interfaces, intelligent automation, and tailored recommendations.  Cluster 0, while also acknowledging personalization, emphasizes the broader goal of making AI accessible and beneficial across diverse domains, including education, healthcare, productivity, and entertainment.  Both clusters ultimately showcase the transformative potential of Gemini AI to simplify tasks, improve user experiences, and create innovative tools for individuals and businesses alike. 



In [28]:
import pprint

pp = pprint.PrettyPrinter(width=120, compact=True)
pp.pprint(output.content)

('Both clusters highlight the potential of Gemini API applications to enhance and personalize various aspects of daily '
 'life.  Cluster 1 emphasizes the focus on user-centric experiences, particularly through conversational interfaces, '
 'intelligent automation, and tailored recommendations.  Cluster 0, while also acknowledging personalization, '
 'emphasizes the broader goal of making AI accessible and beneficial across diverse domains, including education, '
 'healthcare, productivity, and entertainment.  Both clusters ultimately showcase the transformative potential of '
 'Gemini AI to simplify tasks, improve user experiences, and create innovative tools for individuals and businesses '
 'alike. \n')


In [29]:
with open('output/final_summary.txt', 'w+') as text_file:
    text_file.write(output.content)