# 🧠 Exercises XP – Web Scraping & JavaScript Simulations
This notebook covers Exercises 1–5 using Python and conceptual examples for JavaScript.

In [None]:
# 📦 Install required packages
!pip install selenium webdriver-manager beautifulsoup4


## 🌟 Exercise 1: JavaScript Variables and Data Types (Conceptual)

Save this HTML file and open it in your browser to see console output.
```html
<!DOCTYPE html>
<html>
<head><title>JS Data Types</title></head>
<body>
<script>
    let myString = "Hello, JavaScript!";
    let myNumber = 42;
    let myBoolean = true;
    let myUndefined;
    let myNull = null;

    console.log(myString, typeof myString);
    console.log(myNumber, typeof myNumber);
    console.log(myBoolean, typeof myBoolean);
    console.log(myUndefined, typeof myUndefined);
    console.log(myNull, typeof myNull);
</script>
</body>
</html>
```

---

## 🌟 Exercise 2: Static HTML vs JavaScript-Enhanced Page

**File 1: static.html**
```html
<h1>Welcome</h1>
<p>This is a static page</p>
<ul><li>HTML only</li><li>No interaction</li></ul>
```

**File 2: dynamic.html**
```html
<h1 id="title">Original Heading</h1>
<script>
    document.getElementById("title").innerText = "Changed by JavaScript!";
</script>
```


In [None]:
# 🌟 Exercise 3, 4, 5: Web Scraping with Selenium and BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time
import pandas as pd
import re
from collections import Counter
import statistics

# Setup headless browser
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options)

# ---------- Exercise 3: Rotten Tomatoes ----------
driver.get("https://www.rottentomatoes.com/browse/movies_at_home/sort:popular")
time.sleep(5)
soup = BeautifulSoup(driver.page_source, 'html.parser')
movies = soup.select('div.js-tile-link')[:10]
movie_data = []
for movie in movies:
    title = movie.get('data-title', 'N/A')
    score = movie.get('data-score', 'N/A')
    release = movie.get('data-release-date', 'N/A')
    movie_data.append({
        "title": title,
        "score": score,
        "release_date": release
    })
df_movies = pd.DataFrame(movie_data)
print("🎬 Top 10 Rotten Tomatoes Movies:")
print(df_movies)

# ---------- Exercise 4: BBC News ----------
driver.get("https://www.bbc.com/innovation/technology")
time.sleep(5)
soup = BeautifulSoup(driver.page_source, "html.parser")
articles = soup.select("a[href*='/news/'] h2")
categorized_articles = {}
for tag in articles:
    title = tag.text.strip()
    match = re.search(r'\d{4}/\d{2}/\d{2}', str(tag))
    if match:
        date = match.group(0)
        month = time.strftime('%B', time.strptime(date.split('/')[1], "%m"))
        categorized_articles.setdefault(month, []).append(title)

print("\n📰 BBC Articles by Month:")
for month, titles in categorized_articles.items():
    print(f"\n{month}")
    for title in titles:
        print(f"- {title}")

# ---------- Exercise 5: Accuweather ----------
driver.get("https://www.accuweather.com/en/us/los-angeles-ca/90012/weather-forecast/348108")
time.sleep(6)
soup = BeautifulSoup(driver.page_source, "html.parser")
temps = [int(s.text.strip().replace("°", "")) for s in soup.select(".high span") if s.text.strip().replace("°", "").isdigit()]
conditions = [s.text.strip() for s in soup.select(".phrase") if s.text.strip()]
avg_temp = statistics.mean(temps) if temps else None
most_common_condition = Counter(conditions).most_common(1)[0][0] if conditions else None
print(f"\n🌤️ LA Weather Analysis:\n- Average Temp: {avg_temp}°C\n- Most Common Condition: {most_common_condition}")

driver.quit()
