In [38]:
import pandas as pd
import numpy as np
from duckduckgo_search import DDGS
import json
import html2text
from langchain.document_loaders import AsyncHtmlLoader
from langchain.document_transformers import Html2TextTransformer
import os

def ddg_search(input_text,max_results=None):
    """
    parameter: input_text is a keyword to search
    The function fetch all the url related to given text and saves in json format
    """
    file_path = "ddgs.json"
    if os.path.exists(file_path):
        # Delete the file
        os.remove(file_path)
        print("File deleted successfully.")
    else:
        print("File does not exist.")

    with DDGS() as ddgs:
        results = [r for r in ddgs.text(f"{input_text}", safesearch = "off",timelimit = "d",max_results = max_results)]
    
    # Write list of dictionaries to JSON file
    with open(file_path, "w") as json_file:
        json.dump(results, json_file)
        print("file created successfully")



def do_webscraping(json_file):
    """
    parameter: it takes json file
    returns a list of dictionaries including page content, title, metadata and clean text of the urls
    """
    with open(f"{json_file}", "r") as json_file:
        data_list = json.load(json_file)

    urls = [x['href'] for x in data_list]

    structured_response = []
    for url in urls:
        try:
            loader = AsyncHtmlLoader(url)
            docs = loader.load()

            html2text_transformer = Html2TextTransformer()
            docs_transformed = html2text_transformer.transform_documents(docs)

            if docs_transformed is not None and len(docs_transformed) > 0:
                metadata = docs_transformed[0].metadata
                title = metadata.get('title', '')

                structured_response.append({
                    'summary': docs_transformed[0].page_content,
                    'title': title,
                    'metadata': metadata,
                    'clean_content': html2text.html2text(docs_transformed[0].page_content)
                })
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
            continue
    
    return structured_response


In [39]:
input_text = "tata technology stocks"

In [40]:
ddg_search(input_text,max_results=5)

File deleted successfully.
file created successfully


In [41]:

response = do_webscraping(json_file="ddgs.json")


Fetching pages:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.45it/s]
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  2.65it/s]
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.74it/s]
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  3.22it/s]
Fetching pages: 100%|##########| 1/1 [00:00<00:00,  4.78it/s]


In [56]:
text = response[0]['clean_content']

Create Embeddings

In [51]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
import google.generativeai as genai
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain


In [75]:
from langchain_community.llms import CTransformers

llm = CTransformers(model="C:\\Users\\omkar\\Downloads\\LLama2-Model\\llama-2-7b-chat.ggmlv3.q8_0.bin",model_type='llama',
                    config={'max_new_tokens':256,
                    'temperature':0.01})

In [44]:
os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [77]:
texts = f"Sentiment analysis: {text}"
sentiment = llm(texts)

  warn_deprecated(
Number of tokens (7331) exceeded maximum context length (512).


In [76]:
text

'Benchmarks Nifty22,378.4039.66 FEATURED FUNDS ★★★★★ Canara Robeco ELSS Tax Saver Regular-IDCW 5Y Return 19.78 % Invest Now FEATURED FUNDS ★★★★★ Canara Robeco Small Cap Fund Regular - Growth 5Y Return 27.12 % Invest Now English EditionEnglish Editionहिन्दीગુજરાતીमराठीবাংলাಕನ್ನಡമലയാളംதமிழ்తెలుగు | Today\'s ePaper Subscribe Sign In Steal Deal on ETPrime Home ETPrime Markets News Industry Rise Politics Wealth Mutual Funds Tech Careers Opinion NRI Panache ET TV Spotlight More Menu Stocks Stock Liveblog News Live Blog Earnings Podcast Market Classroom Dons of Dalal Street Recos Stock Reports PlusNew My Screener Candlestick Screener Stock Screener Stock Watch Market Calendar Stock Price Quotes Options IPOs/FPOs Expert Views Markets Data Investment Ideas Commodities ViewsNews OthersMentha OilPrecious MetalsGold MGoldSilverGold PetalSilver MicroSilver MGold Guinea Oil & EnergyNatural GasCrude OilCrude Oil MiniBase MetalsAluminiumZinc MiniLead MiniCopperZincNickelAluminium MiniLeadPlantationKap