<a href="https://colab.research.google.com/github/srikanth-gedela/Langchain/blob/main/Langchain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 **Streamlit**: Streamlit is a Python library that makes it easy to create interactive web apps. It is very popular for data science and machine learning tasks, as it allows you to quickly and easily create visualizations and dashboards.

**Yahooquery**: Yahooquery is a Python library that makes it easy to access data from Yahoo Finance. It can be used to get stock prices, news, and other financial information.

**Edgar**: Edgar is a Python library that makes it easy to access data from the Securities and Exchange Commission (SEC). It can be used to get corporate filings, such as 10-Ks and 10-Qs.

**Langchain**: Langchain is a Python library that makes it easy to build and train language models. It is based on the PyTorch library and supports a variety of different model architectures.

**Transformers**: Transformers is a Python library that provides implementations of the Transformer architecture. The Transformer architecture is a powerful neural network architecture that is used for a variety of natural language processing tasks, such as machine translation, text summarization, and question answering.

**Einops**: Einops is a Python library that provides a set of functions for performing efficient tensor operations. It can be used to speed up the training and inference of machine learning models.

**Accelerate**: Accelerate is a Python library that provides a set of tools for improving the performance of machine learning models. It can be used to optimize the code for different hardware platforms, such as CPUs, GPUs, and TPUs.

**Bitsandbytes**: Bitsandbytes is a Python library that provides a set of tools for working with large datasets. It can be used to load, process, and store large datasets efficiently.




In [2]:
%%shell

# Install streamlit, yahooquery, edgar, and langchain
pip install -q streamlit yahooquery edgar langchain

# Install transformers, einops, accelerate, and bitsandbytes
pip install -q transformers einops accelerate bitsandbytes

nvidia-smi

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.8/49.8 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.0/990.0 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.8/164.8 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m104.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m103.5 MB/s[0m



In [4]:
## Using this code as is will incur OpenAI fees
# If you do not wish to incur these fees you will need to select an alternative LLM

import streamlit as st
import pandas as pd
from PIL import Image

import yfinance as yf
from yahooquery import Ticker
from datetime import datetime, timedelta
from edgar import Company, TXTML

#from dotenv import load_dotenv
import os

from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter, PythonCodeTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings




In [5]:
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline
import torch
model = "tiiuae/falcon-7b-instruct" #tiiuae/falcon-40b-instruct
tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = pipeline(
"text-generation", #task
model=model,
tokenizer=tokenizer,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto",
max_length=200,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id
)
llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})


Downloading (…)okenizer_config.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/667 [00:00<?, ?B/s]

Downloading (…)/configuration_RW.py:   0%|          | 0.00/2.61k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- configuration_RW.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


Downloading (…)main/modelling_RW.py:   0%|          | 0.00/47.5k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b-instruct:
- modelling_RW.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


Downloading (…)model.bin.index.json:   0%|          | 0.00/16.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00002.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00002.bin:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
The model 'RWForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusFo

In [6]:
### Use the code to call ChatGPT in case youOpen API Key 
#openai_api_key = os.getenv("OPENAI_API_KEY")
#load_dotenv()

def format_large_number(num):
    if abs(num) >= 1_000_000_000_000:
        return f"${num / 1_000_000_000_000:.2f}T"
    elif abs(num) >= 1_000_000_000:
        return f"${num / 1_000_000_000:.2f}B"
    elif abs(num) >= 1_000_000:
        return f"${num / 1_000_000:.2f}M"
    else:
        return str(num)

# Create a dictionary for stocks
stocks = {
    "Apple - 'AAPL'": {"name": "APPLE INC", "symbol": "AAPL", "cik": "0000320193"},
    "Alphabet - 'GOOG'": {"name": "Alphabet Inc.", "symbol": "GOOG", "cik": "0001652044"},
    "Facebook - 'META'": {"name": "META PLATFORMS INC", "symbol": "META", "cik": "0001326801"},
    "Amazon - 'AMZN'": {"name": "AMAZON COM INC", "symbol": "AMZN", "cik": "0001018724"},
    "Netflix - 'NFLX'": {"name": "NETFLIX INC", "symbol": "NFLX", "cik": "0001065280"},
    "Microsoft - 'MSFT'": {"name": "MICROSOFT CORP", "symbol": "MSFT", "cik": "0000789019"},
    "Tesla - 'TSLA'": {"name": "TESLA INC", "symbol": "TSLA", "cik": "0001318605"},
}
def get_recommendation(stock_cik, question, llm=llm, model_name=model):

    company = Company(stock_cik["name"], stock_cik["cik"])
    doc = company.get_10K()
    text = TXTML.parse_full_10K(doc)

    #llm = OpenAI(temperature=0.15, openai_api_key=openai_api_key)

    lts = int(len(text) / 3)
    lte = int(lts * 2)

    text_splitter = PythonCodeTextSplitter(chunk_size=3000, chunk_overlap=300)
    docs = text_splitter.create_documents([text[lts:lte]])

    # Get your embeddings engine ready
    #embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
    embeddings = HuggingFaceEmbeddings(model_name=model_name)

    # Embed your documents and combine with the raw text in a pseudo db. Note: This will make an API call to OpenAI
    docsearch = FAISS.from_documents(docs, embeddings)

    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())
    query = question
    analysis = qa.run(query)

    return analysis.translate(str.maketrans("", "", "_*"))




In [9]:
%%shell
pip install -q sentence_transformers



In [None]:
#st.set_page_config(page_title="Stock Information", layout="wide", initial_sidebar_state="collapsed", page_icon="Logo.png")
st.set_page_config(page_title="Stock Information", layout="wide", initial_sidebar_state="collapsed", page_icon="Logo.png")
col1, col2 = st.columns((1, 3))
icon = Image.open("Logo.png")
col1.image(icon, width=100)
selected_stock = col1.selectbox("Select a stock", options=list(stocks.keys()), index=0)

# Get stock data from yfinance
ticker = yf.Ticker(stocks[selected_stock]["symbol"])

# Calculate the date range for the last 30 days
end_date = datetime.now()
start_date = end_date - timedelta(days=360)

# Get the closing prices for the selected stock in the last 30 days
data = ticker.history(start=start_date, end=end_date)
closing_prices = data["Close"]

# Plot the line chart in the first column
col1.line_chart(closing_prices, use_container_width=True)

# Get the company long description
long_description = ticker.info["longBusinessSummary"]

# Display the long description in a text box in the second column
col2.title("Company Overview")
col2.write(long_description)

# Use yahooquery to get earnings and revenue
ticker_yq = Ticker(stocks[selected_stock]["symbol"])
earnings = ticker_yq.earnings

financials_data = earnings[stocks[selected_stock]["symbol"]]['financialsChart']['yearly']


df_financials = pd.DataFrame(financials_data)
df_financials['revenue'] = df_financials['revenue']
df_financials['earnings'] = df_financials['earnings']
df_financials = df_financials.rename(columns={'earnings': 'yearly earnings', 'revenue': 'yearly revenue'})

numeric_cols = ['yearly earnings', 'yearly revenue']
df_financials[numeric_cols] = df_financials[numeric_cols].applymap(format_large_number)
df_financials['date'] = df_financials['date'].astype(str)
df_financials.set_index('date', inplace=True)

# Display earnings and revenue in the first column
col1.write(df_financials)

summary_detail = ticker_yq.summary_detail[stocks[selected_stock]["symbol"]]

obj = yf.Ticker(stocks[selected_stock]["symbol"])

pe_ratio = '{0:.2f}'.format(summary_detail["trailingPE"])
price_to_sales = summary_detail["fiftyTwoWeekLow"]
target_price = summary_detail["fiftyTwoWeekHigh"]
market_cap = summary_detail["marketCap"]
ebitda = ticker.info["ebitda"]
tar = ticker.info["targetHighPrice"]
rec = ticker.info["recommendationKey"].upper()

# Format large numbers
market_cap = format_large_number(market_cap)
ebitda = format_large_number(ebitda)

# Create a dictionary for additional stock data
additional_data = {
    "P/E Ratio": pe_ratio,
    "52 Week Low": price_to_sales,
    "52 Week High": target_price,
    "Market Capitalisation": market_cap,
    "EBITDA": ebitda,
    "Price Target": tar,
    "Recommendation": rec
}

# Display additional stock data in the first column
for key, value in additional_data.items():
    col1.write(f"{key}: {value}")

st.title("Stock Analysis Demo. Powered by LangChain 🦜🔗 ")
col2.title("Opportunities for investors")
print(f"**********\nstocks[selected_stock]\n*************\n{stocks[selected_stock]}\n\n**********\n")
col2.write(get_recommendation(stocks[selected_stock], "What are this firm's key products and services?"))
col2.write(get_recommendation(stocks[selected_stock], "What are the new products and growth opportunities for this firm. What are its unique strengths?"))
col2.write(get_recommendation(stocks[selected_stock], "Who are this firms key competitors? What are the principal threats?"))



**********
stocks[selected_stock]
*************
{'name': 'APPLE INC', 'symbol': 'AAPL', 'cik': '0000320193'}

**********



Downloading (…)316c2/.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

Downloading (…)683d8316c2/README.md:   0%|          | 0.00/8.67k [00:00<?, ?B/s]

Downloading (…)3d8316c2/config.json:   0%|          | 0.00/667 [00:00<?, ?B/s]

Downloading (…)/configuration_RW.py:   0%|          | 0.00/2.61k [00:00<?, ?B/s]

Downloading model.mlmodel:   0%|          | 0.00/397k [00:00<?, ?B/s]

Downloading weight.bin:   0%|          | 0.00/27.7G [00:00<?, ?B/s]