## Discussion and Answers for Questions from Previous Class

### Question 1: How can I save a DataFrame as a CSV file using Pandas?

In [None]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install pandas

In [None]:
# Importing libraries
import pandas as pd

In [None]:
# Create or load your DataFrame (replace this with your DataFrame)
data = {'Column1': [1, 2, 3, 4],
        'Column2': ['A', 'B', 'C', 'D']}
df = pd.DataFrame(data)

In [None]:
# Export the DataFrame to a CSV file
df.to_csv('your_file_name.csv', index=False)

### Question 2: How to scrape a webpage using Python?

In [None]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install requests beautifulsoup4

In [None]:
# Importing libraries
import csv
import requests
from bs4 import BeautifulSoup

In [None]:
# Define the URL of the Goodreads shelf you want to crawl
url = 'https://www.goodreads.com/shelf/show/machinelearning'

In [None]:
# Send an HTTP GET request to fetch the webpage content
response = requests.get(url)

In [None]:
# Check if the request was successful (status code 200)
if response.status_code == 200:
  # Parse the HTML content using BeautifulSoup
  soup = BeautifulSoup(response.text, 'html.parser')

  # Extract and print the title of the webpage
  title = soup.title.string
  print(f"Title: {title}")

  # Extract book titles from the shelf with class='bookTitle'
  book_titles = [element.text.strip() for element in soup.find_all(class_='bookTitle')]

  # Extract authors with itemprop="name"
  authors = soup.find_all(attrs={"itemprop": "name"})

  # Check if the number of book titles matches the number of authors
  if len(book_titles) == len(authors):
    # Create a list of dictionaries with book information
    books_info = [{"Book Title": title, "Author": author.get_text()} for title, author in zip(book_titles, authors)]

    # Specify the CSV file name
    csv_file_name = 'books.csv'

    # Write the extracted data to a CSV file
    with open(csv_file_name, 'w', newline='') as csv_file:
      fieldnames = ["Book Title", "Author"]
      writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
            
      # Write the header row
      writer.writeheader()
            
      # Write book information
      writer.writerows(books_info)

      print(f"Data written to {csv_file_name}")
  else:
    print("Number of book titles does not match the number of authors.")
else:
  print(f"Failed to fetch the webpage. Status code: {response.status_code}")

### Question 3: How can I calculate the correlation between two variables with different timestamps in Python?

In [None]:
# Importing libraries
import pandas as pd
import numpy as np

In [None]:
# Example data
timestamps1 = pd.date_range(start="2023-01-01", periods=10, freq="D")
data1 = np.random.rand(10)

timestamps2 = pd.date_range(start="2023-01-05", periods=10, freq="D")
data2 = np.random.rand(10)

In [None]:
# Create DataFrames
df1 = pd.DataFrame(data1, index=timestamps1, columns=["Variable1"])
df2 = pd.DataFrame(data2, index=timestamps2, columns=["Variable2"])

In [None]:
# Resample and forward-fill missing values to align the data
df1_resampled = df1.resample("D").ffill()
df2_resampled = df2.resample("D").ffill()

In [None]:
# Calculate the correlation
correlation = df1_resampled["Variable1"].corr(df2_resampled["Variable2"])

In [None]:
print("Correlation between Variable1 and Variable2:")
print(correlation)