In [None]:
# ========== STEP 1: Install required packages ==========
!pip install requests beautifulsoup4 nltk matplotlib

In [None]:
# ========== STEP 2: Imports ==========
import requests
from bs4 import BeautifulSoup
import re
import nltk
from nltk.stem import PorterStemmer
import matplotlib.pyplot as plt

In [None]:
# ========== STEP 3: Setup ==========
nltk.download('stopwords')
STOP_WORDS = set(nltk.corpus.stopwords.words('english'))
stemmer = PorterStemmer()
FIREBASE_URL = "https://tergol-6-cloud-default-rtdb.firebaseio.com/project_words_index.json"

In [None]:
# ========== STEP 4: Web Scraping and Indexing ==========
def fetch_page(url):
    response = requests.get(url)
    if response.status_code == 200:
        return BeautifulSoup(response.text, 'html.parser')
    return None

def index_words(soup):
    index = {}
    words = re.findall(r'\w+', soup.get_text())
    for word in words:
        word = word.lower()
        if word not in STOP_WORDS:
            stemmed = stemmer.stem(word)
            index[stemmed] = index.get(stemmed, 0) + 1
    return index


In [None]:
# ========== STEP 5: Use Your Site ==========
url = 'https://cloud.google.com/customers/spotify'
soup = fetch_page(url)
if soup:
    index = index_words(soup)
    sorted_index = sorted(index.items(), key=lambda x: x[1], reverse=True)
    top_10 = dict(sorted_index[:10])
    print("Top 10 Words:", top_10)
else:
    print("❌ Failed to fetch the page")

In [None]:
# ========== STEP 6: Upload to Firebase ==========
upload_response = requests.put(FIREBASE_URL, json=top_10)
if upload_response.status_code == 200:
    print("✅ Uploaded to Firebase successfully")
else:
    print("❌ Upload failed:", upload_response.text)

In [None]:
# ========== STEP 7: Read from Firebase ==========
firebase_data = requests.get(FIREBASE_URL).json()
print("\n📦 Words from Firebase:")
for word, count in firebase_data.items():
    print(f"{word}: {count}")

In [None]:
# ========== STEP 8: Chart ==========
words = list(firebase_data.keys())
counts = list(firebase_data.values())

plt.figure(figsize=(12, 6))
plt.bar(words, counts, color='mediumseagreen')
plt.xlabel('Words')
plt.ylabel('Frequency')
plt.title('Top 10 Word Frequencies from Spotify Page')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()