# Chapter 10

## 10.1.1

In [None]:
import requests
import json    #A

url = "https://api.weather.gov/gridpoints/OKX/34,36/forecast"   #B
response = requests.get(url)   #C
print(response)   #D

#A Import requests and json libraries
#B Define the URL 
#C Use the requests library to call the API using the parameters we defined
#D Print the response from the API

In [None]:
forecast = response.json()    #A
print(forecast.keys())    #B

#A Save the response as a JSON format
#B Print the keys of the new JSON object

In [None]:
print(forecast["properties"]["periods"])    #A

#A Print the contents of the nested forecast periods

In [None]:
import pandas as pd    #A

forecast_df = pd.DataFrame(forecast["properties"]["periods"])    #B
forecast_df[
    ["endTime", "isDaytime", "temperature", "windSpeed"]
].head()  # C

#A Import the pandas library
#B Save the forecast JSON as a pandas dataframe
#C Display only the columns we need

In [None]:
import re    #A

def get_wind_speed(row):    #B
    numbers = [int(num) for num in re.findall(r'\d+', row)]
    return max(numbers)

forecast_df["date"] = pd.to_datetime(forecast_df['endTime']).dt.date    #C

forecast_df["wind_speed"] = forecast_df["windSpeed"].apply(
    get_wind_speed
)  # D

forecast_df[["date", "temperature", "wind_speed"]].head()    #E

#A Import the regular expressions library
#B Define a custom function that finds all integers and returns the maximum of those values
#C Convert the endTime field to a calendar date
#D Apply the custom function to extract only the highest wind speed integer
#E Display the first five rows of the new data

In [None]:
daily_forecast = (    #A 
    forecast_df.groupby("date")
    .agg({
        "temperature": ["min", "max"],
        "wind_speed": "max"})
    .reset_index()
)

daily_forecast.columns = [    #B
"date", "low_temp", "high_temp", "wind_speed"
]

daily_forecast.head()    #C
 
#A Aggregate the forecast data by day
#B Rename the columns
#C Display the first five rows of the new dataframe

# 10.2.1

In [None]:
##### UPDATE CODE IN BOOK #####

lat, lon = 40.7392, -73.9829  # A

url = f"https://forecast.weather.gov/MapClick.php?lat={lat}&lon={lon}"  # B
headers = {
    "User-Agent": "Mona's data exploration"
}

response = requests.get(url, headers = headers)  # C
print(response)  # D

#A Create the latitude and longitude variables for the New York City weather station
#B Create the URL for the forecast page using our latitude and longitude variables
#C Use the requests library to call the API using the parameters we defined
#D Print the response from the API

In [None]:
print(response.text)    #A

#A Print the content of the response

In [None]:
from bs4 import BeautifulSoup  # A

soup = BeautifulSoup(response.text, "html.parser")  # B
print(soup.prettify())  # C

#A Import the BeautifulSoup library
#B Create a soup object with the parsed HTML data
#C Print the formatted HTML content

In [None]:
forecast_items = soup.find_all("div", class_="tombstone-container")  # A

for item in forecast_items:  # B
    print(item.text, item.img)

#A Retrieve all HTML content inside the section with the class “tombstone-container” 
#B Print the text of each forecast item found inside the section

In [None]:
forecast_data = []

for item in forecast_items:    #A
    period = (    #B
        item.find("p", class_="period-name")
        .get_text()
        .replace("Night", "")
    )
    temp = item.find("p", class_="temp").get_text()    #C

    wind_text = item.find("img")["alt"]    #D

    forecast_data.append(    #E
        {
            "date": period,
            "temp": temp,
            "wind_speed": wind_text,
        }
    )

forecast_data = pd.DataFrame(forecast_data)    #F
forecast_data.head()

#A Create a for loop to process each forecast record
#B Extract the name of the day of week, and remove the word “Night”
#C Extract the temperature value for the time period
#D Extract the sentence that contains the wind speed
#E Append each processed record to a dataset
#F Convert the dataset to a dataframe and display the first five rows 

In [None]:
import re     #A
forecast_data["temp"] = [    #B
    int(re.search(r"\d+", t).group()) for t in forecast_data["temp"]
]
forecast_data.head()    #C

#A Import the re library
#B Loop through each row of the temp column, returning only the digit values as a single integer
#C Display the first five rows of the dataframe

In [None]:
for row in forecast_data["wind_speed"].head():     #A
    print(row)

#A Print the entire text of the first five rows of the wind speed column

In [None]:
forecast_data["wind_speed"] = [    #A
    re.findall(r"\d+ mph", w) for w in forecast_data["wind_speed"]
]
forecast_data.head()    #B

#A Use the re library’s findall function to return all integers followed by a space and “mph”
#B Display the first five rows of the dataframe

In [None]:
forecast_data["wind_speed"] = [   #A
    int(row[0].replace(" mph", "")) if row else None 
    for row in forecast_data["wind_speed"]
]

forecast = (    #B
    forecast_data.groupby("date", sort=False)
    .agg({"temp": ["min", "max"], "wind_speed": "max"})
    .reset_index()
)

forecast.columns = ["date", "low_temp", "high_temp", "wind_speed"]    #C
forecast.head()

#A Retrieve only the first list item, remove the text, and convert it to an integer
#B Group the dataframe by date and get the low/high temperature for that 24-hour period
#C Rename the columns and display the first five rows

## 10.3.3

In [None]:
from datetime import datetime, timedelta    #A

now = datetime.now()    #B
start = (now - timedelta(days=3)).strftime("%Y-%m-%d")
end = (now - timedelta(days=2)).strftime("%Y-%m-%d")

base = "https://data.cityofnewyork.us/resource/erm2-nwe9.json"    #C
query = "?$query="
select = "SELECT `created_date`, `unique_key` "
where = 'WHERE caseless_eq(`descriptor`, "Rat Sighting") '
dates = f"AND (`created_date` BETWEEN '{start}' :: floating_timestamp AND '{end}' :: floating_timestamp) "    #D

full_url = base + query + select + where + dates    #E

#A Import datetime library functions
#B Calculate the current date, one day, and two days prior as inputs for the endpoint
#C Construct each portion of the endpoint for the NYC Open Data Portal
#D Add the date inputs to the dates variable
#E Construct the full URL 

In [None]:
response = requests.get(full_url)   #A
print(response.json())   #B

#A Request 311 rat sightings data from Open Data NYC
#B Print the JSON response