In [20]:
from serpapi import GoogleSearch
import requests
from bs4 import BeautifulSoup
import os
from IPython.display import display, Markdown
from http.client import responses as http_responses

from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders.base import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain, ConversationalRetrievalChain

In [13]:
search_query = "Spotify market analysis 2022"

llm = ChatOpenAI(
    model="text-davinci-003",
    temperature=0,
    openai_api_key=os.environ["OPENAI_API_KEY"]
)

search = GoogleSearch({
    "q": search_query,
    "location": "Mumbai, Maharashtra, India",
    "api_key": os.environ["SERPAPI_API_KEY"]
})

results = search.get_dict()

if "error" in results:
    print(f"Error: {results['error']}")
else:
    print(f"Number of organic results: {len(results['organic_results'])}")

vectordb = Chroma(embedding_function=OpenAIEmbeddings())

Number of organic results: 10


In [14]:
results_condensed = [(result['title'], result['link']) for result in results['organic_results']]

content_p = ""
count_p = 0

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

for title, link in results_condensed:
    print(f"Title: {title}")
    print(f"Link: {link}")

    response = requests.get(link)
    print(f"Response code: {response.status_code}")
    print(f"Reponse Message: {http_responses[response.status_code]}")
    if response.status_code != 200:
        print()
        continue

    soup = BeautifulSoup(response.text, 'html.parser')
    webpage = ""
    webpage += f'## {title}' + "\n"

    content_p += f'## {title}' + "\n"
    for p in soup.find_all('p'):
        paragraph = p.get_text(separator=' ')

        if len(paragraph) > 100:
            webpage += paragraph
            content_p += paragraph
            content_p += "\n\n"
            count_p += 1

    doc = text_splitter.create_documents(texts=[content_p], metadatas=[{"source": link, "title": title}])
    ids = vectordb.add_documents(documents=[*doc])
    print(f"Added {len(ids)} documents to the database")
    print()

    content_p += "\n-------------------------------------------------------------------------------------\n"

Title: Spotify Revenue and Usage Statistics (2023)
Link: https://www.businessofapps.com/data/spotify-statistics/
Response code: 403
Reponse Message: Forbidden

Title: Highlights from Spotify's Q2 '22 earnings
Link: https://ads.spotify.com/en-US/news-and-insights/spotify-second-quarter-earnings-2022/
Response code: 200
Reponse Message: OK
Added 6 documents to the database

Title: Spotify stays strong through a rocky 2022, but podcasts ...
Link: https://www.insiderintelligence.com/content/spotify-stays-strong-through-rocky-2022-podcasts-have-rough-road-ahead
Response code: 200
Reponse Message: OK
Added 8 documents to the database

Title: Music subscriber market shares 2022
Link: https://midiaresearch.com/blog/music-subscriber-market-shares-2022
Response code: 200
Reponse Message: OK
Added 13 documents to the database

Title: Global music streaming subscribers 2022
Link: https://www.statista.com/statistics/653926/music-streaming-service-subscriber-share/
Response code: 200
Reponse Message

Created a chunk of size 1051, which is longer than the specified 1000


Response code: 403
Reponse Message: Forbidden

Title: Spotify Rebounds
Link: https://www.forbes.com/sites/forrester/2022/04/28/spotify-rebounds/
Response code: 200
Reponse Message: OK
Added 24 documents to the database

Title: Spotify Stock (NYSE: SPOT) Dropped By Almost 70% In 2022
Link: https://www.digitalmusicnews.com/2022/12/28/spotify-stock-december-2022-analysis/
Response code: 403
Reponse Message: Forbidden



In [5]:
# display(Markdown(content_p))

In [23]:
retriever = vectordb.as_retriever()

qa_chain = RetrievalQAWithSourcesChain.from_chain_type(llm=llm,
                                                       chain_type="stuff",
                                                       retriever=retriever,)

qa_chain_chat = ConversationalRetrievalChain.from_llm(llm=llm,
                                                      chain_type="stuff",
                                                      retriever=retriever,)

In [25]:
out = qa_chain_chat(
    {
        "chat_history": """User: First, list out current and potential competitors. Current competitors should include already established businesses/products. Potential competitors should include products and businesses that aren’t yet popular or are still under development/ beta version. Also include major or minor differences between our product and the competitor products you have identified. Analyze how aspects of our product or competitor products are better for that particular aspect. How do the target customers different? Does our product better cater to current trends and expectations of the users? How? What should the product include to meet those trends and expectations.
        
        AI: \
| Competitor | User Base (in millions) | User Region | Different Features Supported | Pricing Tiers | Unique Selling Point |
|------------|-------------------------|-------------|-----------------------------|---------------|----------------------|
| Spotify    | 345                     | Global      | Personalized playlists, Podcasts, Social sharing | Free with ads, Premium ($9.99/month), Family ($14.99/month) | Extensive music library, personalized playlists |
| Apple Music| 72                      | Global      | Personalized playlists, Integration with Apple devices, Music videos | Individual ($9.99/month), Family ($14.99/month), Student ($4.99/month) | Seamless integration with Apple devices |
| Pandora    | 58                      | USA, Australia, New Zealand | Personalized radio stations, Podcasts | Free with ads, Plus ($4.99/month), Premium ($9.99/month) | Music genome project for personalized radio |
| YouTube Music | 30                  | Global      | Personalized playlists, Music videos, Integration with YouTube | Free with ads, Premium ($9.99/month), Family ($14.99/month) | Integration with YouTube, music videos |
| Tidal      | 3                       | Global      | High-quality audio, Exclusive content, Music videos | Premium ($9.99/month), HiFi ($19.99/month), Family plans available | High-quality audio, exclusive content |
""",
        "question": "Re-generate the table with the new information about Spotify"
    }
)

ValueError: Unsupported chat history format: <class 'str'>. Full chat history: User: First, list out current and potential competitors. Current competitors should include already established businesses/products. Potential competitors should include products and businesses that aren’t yet popular or are still under development/ beta version. Also include major or minor differences between our product and the competitor products you have identified. Analyze how aspects of our product or competitor products are better for that particular aspect. How do the target customers different? Does our product better cater to current trends and expectations of the users? How? What should the product include to meet those trends and expectations.
        
        AI: | Competitor | User Base (in millions) | User Region | Different Features Supported | Pricing Tiers | Unique Selling Point |
|------------|-------------------------|-------------|-----------------------------|---------------|----------------------|
| Spotify    | 345                     | Global      | Personalized playlists, Podcasts, Social sharing | Free with ads, Premium ($9.99/month), Family ($14.99/month) | Extensive music library, personalized playlists |
| Apple Music| 72                      | Global      | Personalized playlists, Integration with Apple devices, Music videos | Individual ($9.99/month), Family ($14.99/month), Student ($4.99/month) | Seamless integration with Apple devices |
| Pandora    | 58                      | USA, Australia, New Zealand | Personalized radio stations, Podcasts | Free with ads, Plus ($4.99/month), Premium ($9.99/month) | Music genome project for personalized radio |
| YouTube Music | 30                  | Global      | Personalized playlists, Music videos, Integration with YouTube | Free with ads, Premium ($9.99/month), Family ($14.99/month) | Integration with YouTube, music videos |
| Tidal      | 3                       | Global      | High-quality audio, Exclusive content, Music videos | Premium ($9.99/month), HiFi ($19.99/month), Family plans available | High-quality audio, exclusive content |
 

In [11]:
query = """\
The following table was generated by you to show the competitive analysis in the music app market:

| Competitor | User Base (in millions) | User Region | Different Features Supported | Pricing Tiers | Unique Selling Point |
|------------|-------------------------|-------------|-----------------------------|---------------|----------------------|
| Spotify    | 345                     | Global      | Personalized playlists, Podcasts, Social sharing | Free with ads, Premium ($9.99/month), Family ($14.99/month) | Extensive music library, personalized playlists |
| Apple Music| 72                      | Global      | Personalized playlists, Integration with Apple devices, Music videos | Individual ($9.99/month), Family ($14.99/month), Student ($4.99/month) | Seamless integration with Apple devices |
| Pandora    | 58                      | USA, Australia, New Zealand | Personalized radio stations, Podcasts | Free with ads, Plus ($4.99/month), Premium ($9.99/month) | Music genome project for personalized radio |
| YouTube Music | 30                  | Global      | Personalized playlists, Music videos, Integration with YouTube | Free with ads, Premium ($9.99/month), Family ($14.99/month) | Integration with YouTube, music videos |
| Tidal      | 3                       | Global      | High-quality audio, Exclusive content, Music videos | Premium ($9.99/month), HiFi ($19.99/month), Family plans available | High-quality audio, exclusive content |

Re-generate the table with the new information about Spotify.
"""

llm_response = qa_chain(query)

print(llm_response['answer'])
print(llm_response['sources'])



| Competitor | User Base (in millions) | User Region | Different Features Supported | Pricing Tiers | Unique Selling Point |
|------------|-------------------------|-------------|-----------------------------|---------------|----------------------|
| Spotify    | 489                     | Global      | Personalized playlists, Podcasts, Social sharing | Free with ads, Premium ($9.99/month), Family ($14.99/month) | Extensive music library, personalized playlists |
| Apple Music| 72                      | Global      | Personalized playlists, Integration with Apple devices, Music videos | Individual ($9.99/month), Family ($14.99/month), Student ($4.99/month) | Seamless integration with Apple devices |
| Pandora    | 58                      | USA, Australia, New Zealand | Personalized radio stations, Podcasts | Free with ads, Plus ($4.99/month), Premium ($9.99/month) | Music genome project for personalized radio |
| YouTube Music | 30                  | Global      | Personalized playlis

In [8]:
query = "Name 3 devices that help you monitor your plant's health"

print(vectordb.similarity_search(query, k=5)[0].page_content)

Customized Research & Analysis projects:
                
                            


                                
                    Get quick analyses with our professional research service
                
                            


                                The best of the best: the portal for top lists & rankings:
                            


                        Strategy and business building for the data-driven economy:
                    

Industry-specific and extensively researched technical data (partially from exclusive partnerships).  A paid subscription is required for full access.


                                Show sources information
                             
                                Show publisher information
                             
                                Use Ask Statista Research Service
