<a href="https://colab.research.google.com/github/saikumar7952/AI/blob/main/AI_projects.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Giving input**

In [7]:
pip install gensim nltk



In [8]:
import nltk
from gensim import corpora
from gensim.models import LdaModel
from nltk.corpus import stopwords
nltk.download('stopwords')
documents = [
    'The stock market is showing a downward trend in tech companies.',
    'A new political debate is scheduled for next week.',
    'Football players are preparing for the upcoming season.',
    'Technology innovations are changing how we live.',
    'The government will discuss new policies next month.'
]
stop_words = stopwords.words('english')
texts = [
    [word for word in doc.lower().split() if word not in stop_words]
    for doc in documents
]
dictionary = corpora.Dictionary(texts)
corpus = [dictionary.doc2bow(text) for text in texts]
lda = LdaModel(corpus, num_topics=2, id2word=dictionary, passes=15)
topics = lda.print_topics(num_words=4)
for topic in topics:
    print(topic)


(0, '0.083*"new" + 0.083*"next" + 0.050*"discuss" + 0.050*"government"')
(1, '0.062*"stock" + 0.062*"trend" + 0.062*"market" + 0.062*"showing"')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


***USING DATASET***


Extract text from the uploaded document so that you can analyze it.
For PDF: Use libraries like PyPDF2 or pdfplumber.
For DOCX: Use python-docx.
For PPT: Use python-pptx.


In [9]:
pip install pdfplumber


Collecting pdfplumber
  Downloading pdfplumber-0.11.5-py3-none-any.whl.metadata (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.5/42.5 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pdfminer.six==20231228 (from pdfplumber)
  Downloading pdfminer.six-20231228-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Downloading pdfplumber-0.11.5-py3-none-any.whl (59 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.5/59.5 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pdfminer.six-20231228-py3-none-any.whl (5.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

To extract text from a PDF:

In [11]:
import pdfplumber

def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ""
        for page in pdf.pages:
            text += page.extract_text()
    return text
text = extract_text_from_pdf("pitchdeck.pdf")
text


'GreenTech Solutions - Pitch Deck\nIntroduction\nGreenTech Solutions is an innovative startup dedicated to providing affordable and sustainable solar energy solutions for urban and rural households. Our mission is to reduce dependence on fossil fuels and make clean energy accessible to everyone.\nProblem\nTraditional energy sources are expensive, environmentally harmful, and unreliable. Millions of homes and businesses struggle with high electricity costs, frequent power outages, and lack of access to renewable energy alternatives.\nSolution\nWe offer a solar-powered energy storage system that reduces electricity costs by 30%, provides backup power during outages, and integrates with smart home technology for energy efficiency. Our modular design allows for scalability based on energy needs.\nMarket\n- The global renewable energy market is expected to grow at 20% annually.\n- The solar energy storage industry is projected to reach $200 billion by 2030.\n- Our initial target market incl

**Text Preprocessing**

In [16]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
import re
nltk.download('punkt_tab')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
def preprocess_text(text):
    text = re.sub(r'[^A-Za-z\s]', '', text)
    text = text.lower()
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    stemmer = PorterStemmer()
    tokens = [stemmer.stem(word) for word in tokens]

    return tokens
cleaned_text = preprocess_text(text)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


**Pitch Deck Analysis**

In [19]:
def detect_sections(text):
    sections = {
        "Introduction": "",
        "Problem": "",
        "Solution": "",
        "Market": "",
        "Business Model": "",
        "Financials": "",
        "Team": ""
    }

    text_string = " ".join(text)
    for section in sections:
        if section.lower() in text_string.lower():
            sections[section] = text_string.split(section)[1].split("\n")[0]  # Extract text after the keyword
    return sections
sections = detect_sections(cleaned_text)


**Section Evaluation:**
Evaluate the quality of each section using keyword analysis or sentiment analysis.
For example, check if the Problem section clearly describes a pain point.


In [20]:
def evaluate_section(section_text):
    if "problem" in section_text:
        return "Good job defining the problem!"
    else:
        return "Consider elaborating more on the problem you're solving."
problem_feedback = evaluate_section(sections["Problem"])


**Automated Feedback Generation**

In [21]:
def generate_feedback(sections):
    feedback = {}
    feedback["Problem"] = evaluate_section(sections["Problem"])
    feedback["Solution"] = evaluate_section(sections["Solution"])
    feedback["Market"] = evaluate_section(sections["Market"])
    feedback["Financials"] = evaluate_section(sections["Financials"])
    feedback["Team"] = evaluate_section(sections["Team"])

    return feedback
feedback = generate_feedback(sections)
print(feedback)


{'Problem': "Consider elaborating more on the problem you're solving.", 'Solution': "Consider elaborating more on the problem you're solving.", 'Market': "Consider elaborating more on the problem you're solving.", 'Financials': "Consider elaborating more on the problem you're solving.", 'Team': "Consider elaborating more on the problem you're solving."}
