# Projet Ya MR

In [1]:
pip install pyqt5 beautifulsoup4 nltk requests

Note: you may need to restart the kernel to use updated packages.


In [2]:
import sys
import requests
from PyQt5.QtWidgets import QApplication, QMainWindow, QVBoxLayout, QWidget, QLabel, QLineEdit, QPushButton, QTextEdit
from PyQt5.QtGui import QFont
from PyQt5.QtCore import Qt
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
from collections import defaultdict
import nltk

# Télécharger les données nécessaires de NLTK
nltk.download('punkt')
nltk.download('stopwords')

# Initialiser le stemming et les mots vides
stemmer = PorterStemmer()
stop_words = set(stopwords.words("english"))

def get_html_source(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Erreur lors de la récupération de l'URL: {e}")
        return None

def extract_text_from_html(html_source):
    soup = BeautifulSoup(html_source, "html.parser")
    return soup.get_text()

def process_text(text):
    tokens = word_tokenize(text)
    processed_tokens = []
    for word in tokens:
        word = word.lower()
        if word.isalpha() and word not in stop_words:
            stemmed_word = stemmer.stem(word)
            processed_tokens.append(stemmed_word)
    return processed_tokens

def build_inverted_index(urls):
    inverted_index = defaultdict(list)
    for url in urls:
        html_source = get_html_source(url)
        if html_source:
            text = extract_text_from_html(html_source)
            tokens = process_text(text)
            for position, token in enumerate(tokens):
                inverted_index[token].append((url, position))
    return dict(inverted_index)

# Liste des URLs à indexer
urls = [
    # "https://www.musicca.com/note-finder",
    # "https://www.musicca.com/interval-finder",
    # "https://www.musicca.com/chord-finder",
    # "https://www.musicca.com/scale-finder"
    'https://smartways.com.tn',
    'https://smartways.com.tn/index.php/contact-us/',
    'https://satisfyinsight.com/'
]

# Construire l'index inversé
inverted_index = build_inverted_index(urls)

class IndexSearchApp(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Recherche par Indexation Stylisée")
        self.setGeometry(200, 200, 600, 500)

        # Créer les widgets
        layout = QVBoxLayout()
        layout.setAlignment(Qt.AlignTop)

        title_label = QLabel("Moteur de Recherche par Indexation")
        title_label.setFont(QFont("Arial", 16, QFont.Bold))
        title_label.setAlignment(Qt.AlignCenter)
        layout.addWidget(title_label)

        self.label = QLabel("Entrez le mot-clé à rechercher :")
        self.label.setFont(QFont("Arial", 12))
        layout.addWidget(self.label)

        self.keyword_input = QLineEdit()
        self.keyword_input.setFont(QFont("Arial", 12))
        self.keyword_input.setStyleSheet("padding: 6px; border: 1px solid #555; border-radius: 8px;")
        layout.addWidget(self.keyword_input)

        self.search_button = QPushButton("Rechercher")
        self.search_button.setFont(QFont("Arial", 12))
        self.search_button.setStyleSheet("padding: 8px; border-radius: 8px; background-color: #3498db; color: white;")
        self.search_button.clicked.connect(self.search_keyword)
        layout.addWidget(self.search_button)

        self.result_display = QTextEdit()
        self.result_display.setReadOnly(True)
        self.result_display.setFont(QFont("Arial", 10))
        self.result_display.setStyleSheet("border: 1px solid #555; border-radius: 8px; padding: 10px;")
        layout.addWidget(self.result_display)

        # Configurer le widget central
        container = QWidget()
        container.setLayout(layout)
        self.setCentralWidget(container)

        # Appliquer un style global à la fenêtre
        self.setStyleSheet("""
            QMainWindow { background-color: #f3f4f6; }
            QLabel { color: #333; }
            QPushButton:hover { background-color: #2980b9; }
            QTextEdit { background-color: #ffffff; color: #333; }
        """)

    def search_keyword(self):
        keyword = self.keyword_input.text().strip().lower()
        stemmed_keyword = stemmer.stem(keyword)

        if stemmed_keyword in inverted_index:
            results = inverted_index[stemmed_keyword]
            result_text = f"<b>Résultats pour le mot-clé '{keyword}':</b>\n\n"
            for url, position in results:
                result_text += f"URL: <a href='{url}'>{url}</a> - Position: {position}\n"
            self.result_display.setHtml(result_text)
        else:
            self.result_display.setHtml(f"<b>Aucun résultat trouvé pour le mot-clé '{keyword}'.</b>")

# Créer l'application PyQt5
app = QApplication(sys.argv)
window = IndexSearchApp()
window.show()
sys.exit(app.exec_())


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\MSI\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\MSI\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
