In [None]:
import requests

# Simple example of fetching a weather page
url = "https://example.com/weather"
response = requests.get(url)
print(f"Status code: {response.status_code}")
print(f"Content: {response.text[:100]}...")  # Print first 100 characters

In [None]:
import requests

def check_robots_txt(url):
    robots_url = f"{url}/robots.txt"
    response = requests.get(robots_url)
    if response.status_code == 200:
        print(f"Robots.txt content:\n{response.text}")
    else:
        print(f"No robots.txt found at {robots_url}")

check_robots_txt("https://curtin.edu.au")

In [None]:
from urllib.parse import urlparse, parse_qs

url = "https://example.com/weather?city=New York&units=metric"
parsed_url = urlparse(url)
query_params = parse_qs(parsed_url.query)

print(f"Scheme: {parsed_url.scheme}")
print(f"Domain: {parsed_url.netloc}")
print(f"Path: {parsed_url.path}")
print(f"Query parameters: {query_params}")

In [None]:
import requests

url = "https://wttr.in/perth"
response = requests.get(url)

if response.status_code == 200:
    print("Successfully fetched the weather page")
    print(f"Content: {response.text[:200]}...")  # Print first 200 characters
else:
    print(f"Failed to fetch the page. Status code: {response.status_code}")

In [None]:
from bs4 import BeautifulSoup

html_content = """
<html>
  <body>
    <h1>Weather Forecast</h1>
    <p class="temperature">25°C</p>
    <p class="condition">Sunny</p>
  </body>
</html>
"""

soup = BeautifulSoup(html_content, 'html.parser')
temperature = soup.find('p', class_='temperature').text
condition = soup.find('p', class_='condition').text

print(f"Temperature: {temperature}")
print(f"Condition: {condition}")

In [None]:
import requests
from bs4 import BeautifulSoup

url = "https://curtin.edu.au"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

for link in soup.find_all('a'):
    href = link.get('href')
    text = link.text
    print(f"Link: {text} -> {href}")

In [None]:
html_content = """
<table id="weather-forecast">
  <tr>
    <th>Day</th>
    <th>Temperature</th>
    <th>Condition</th>
  </tr>
  <tr>
    <td>Monday</td>
    <td>25°C</td>
    <td>Sunny</td>
  </tr>
  <tr>
    <td>Tuesday</td>
    <td>22°C</td>
    <td>Cloudy</td>
  </tr>
</table>
"""

soup = BeautifulSoup(html_content, 'html.parser')
table = soup.find('table', id='weather-forecast')

for row in table.find_all('tr')[1:]:  # Skip header row
    columns = row.find_all('td')
    day = columns[0].text
    temp = columns[1].text
    condition = columns[2].text
    print(f"{day}: {temp}, {condition}")

In [None]:
import pandas as pd

url = "https://en.wikipedia.org/wiki/Global_surface_temperature#Global_temperature_record"
tables = pd.read_html(url)
weather_df = tables[0]  # Assuming the weather table is the first table on the page
print(weather_df.head())

In [None]:
# Assuming we have a cleaned DataFrame 'weather_df'
weather_df.to_csv('weather_data.csv', index=False)
print("Data saved to weather_data.csv")

In [None]:
import time
import requests

def fetch_with_delay(url, delay=1):
    response = requests.get(url)
    print(f"Fetched {url}: Status {response.status_code}")
    time.sleep(delay)  # Wait for 1 second before next request
    return response

# Usage
urls = ["https://example.com/weather/day1", "https://example.com/weather/day2"]
for url in urls:
    fetch_with_delay(url)