In [1]:
# You may need to install these
# pip install pooch requests

import requests
import json

In [3]:
sample_json = '''
{
  "station": "USC00305800",
  "name": "New York Central Park",
  "location": {
    "latitude": 40.7789,
    "longitude": -73.9692
  },
  "observations": [
    {"date": "2023-01-01", "temperature": 32, "precipitation": 0.0},
    {"date": "2023-01-02", "temperature": 28, "precipitation": 0.5},
    {"date": "2023-01-03", "temperature": 35, "precipitation": 0.0},
    {"date": "2023-01-04", "temperature": 38, "precipitation": 0.2},
    {"date": "2023-01-05", "temperature": 41, "precipitation": 0.0}
  ]
}
'''

data = json.loads(sample_json)
print("Setup complete! Station:", data['station'])

Setup complete! Station: USC00305800


In [4]:
print("Date, Temperature")
for obs in data['observations']:
    print(obs['date'], obs['temperature'])

Date, Temperature
2023-01-01 32
2023-01-02 28
2023-01-03 35
2023-01-04 38
2023-01-05 41


In [5]:
total_temp = 0
count = 0
for obs in data['observations']:
    total_temp += obs['temperature']
    count += 1

avg_temp = total_temp / count
print(f"Average temperature: {avg_temp}°F")

Average temperature: 34.8°F


In [6]:
print("Days with precipitation:")
for obs in data['observations']:
    if obs['precipitation'] > 0:
        print(obs['date'], "-", obs['precipitation'], "inches")

Days with precipitation:
2023-01-02 - 0.5 inches
2023-01-04 - 0.2 inches


In [7]:
import requests

# Weather.gov API - no key needed!
# Getting the current forecast for New York City
url = "https://api.weather.gov/gridpoints/OKX/33,37/forecast"

response = requests.get(url, headers={"User-Agent": "student-project"})
forecast = response.json()

# Extract and print forecast periods
print("Real Weather Data for New York City")
print("=" * 40)
for period in forecast['properties']['periods'][:5]:  # first 5 periods
    print(f"{period['name']}: {period['temperature']}°{period['temperatureUnit']}")
    print(f"  {period['shortForecast']}")
    print()

Real Weather Data for New York City
Tonight: 37°F
  Mostly Cloudy

Wednesday: 42°F
  Light Rain Likely

Wednesday Night: 36°F
  Chance Light Rain

Thursday: 41°F
  Cloudy

Thursday Night: 35°F
  Chance Light Rain



In [9]:
import sys
!{sys.executable} -m pip install pooch

Defaulting to user installation because normal site-packages is not writeable
Collecting pooch
  Downloading pooch-1.9.0-py3-none-any.whl.metadata (10 kB)
Downloading pooch-1.9.0-py3-none-any.whl (67 kB)
Installing collected packages: pooch
Successfully installed pooch-1.9.0


In [1]:
import pooch
import os

file_path = pooch.retrieve(
    url="https://github.com/pandas-dev/pandas/raw/main/doc/data/air_quality_no2.csv",
    known_hash=None
)

print("File downloaded to:", file_path)
print("File exists:", os.path.exists(file_path))

Downloading data from 'https://github.com/pandas-dev/pandas/raw/main/doc/data/air_quality_no2.csv' to file '/home/ld3228/.cache/pooch/458dad453f6a48e510cd544bef1854e3-air_quality_no2.csv'.
SHA256 hash of downloaded file: 365ca31c9296ac200e73d357e16a3c1340f9ce6746c83bf81403046dcb374361
Use this value as the 'known_hash' argument of 'pooch.retrieve' to ensure that the file hasn't changed if it is downloaded again in the future.


File downloaded to: /home/ld3228/.cache/pooch/458dad453f6a48e510cd544bef1854e3-air_quality_no2.csv
File exists: True


In [2]:
# Check file size
file_size = os.path.getsize(file_path)
print(f"File size: {file_size} bytes")

# Count lines
with open(file_path, 'r') as f:
    line_count = sum(1 for line in f)

print(f"Number of lines: {line_count}")

File size: 31984 bytes
Number of lines: 1036


In [4]:
# Downloading NOAA global temperature anomalies dataset (reliable, always available)
my_url = "https://www.ncei.noaa.gov/access/monitoring/climate-at-a-glance/global/time-series/globe/land_ocean/1/1/1850-2023.csv"

my_file = pooch.retrieve(url=my_url, known_hash=None)

print("Downloaded to:", my_file)
print("File size:", os.path.getsize(my_file), "bytes")

with open(my_file, 'r') as f:
    lines = f.readlines()

print("Number of lines:", len(lines))
print("First few lines:")
for line in lines[:5]:
    print(line.strip())

Downloading data from 'https://www.ncei.noaa.gov/access/monitoring/climate-at-a-glance/global/time-series/globe/land_ocean/1/1/1850-2023.csv' to file '/home/ld3228/.cache/pooch/9bb4eb075a5ab8bdfe56b5ae9d106a97-1850-2023.csv'.
SHA256 hash of downloaded file: 51377d381314e987007e72b37e2e638288c959987116e59bc0d0e5f822ab12ed
Use this value as the 'known_hash' argument of 'pooch.retrieve' to ensure that the file hasn't changed if it is downloaded again in the future.


Downloaded to: /home/ld3228/.cache/pooch/9bb4eb075a5ab8bdfe56b5ae9d106a97-1850-2023.csv
File size: 1948 bytes
Number of lines: 178
First few lines:
# Title: Global Land and Ocean January Average Temperature Anomalies
# Units: Degrees Celsius
# Base Period: 1901-2000
Year,Anomaly
1850,-0.43


In [5]:
print("\nData Inventory:")
print("1. air_quality_no2.csv - Air quality NO2 measurements from pandas sample data")
print("2. NOAA global temperature anomalies - monthly land+ocean temps from 1850 to 2023")


Data Inventory:
1. air_quality_no2.csv - Air quality NO2 measurements from pandas sample data
2. NOAA global temperature anomalies - monthly land+ocean temps from 1850 to 2023


In [6]:
import requests

base_url = "http://iridl.ldeo.columbia.edu/expert/SOURCES/.NOAA/.NCEP/.CPC/.UNIFIED_PRCP/.GAUGE_BASED/.GLOBAL/.v1p0/.Monthly/.RETRO/.rain/dods"

# Get DDS - describes the structure
dds_url = base_url + ".dds"
response = requests.get(dds_url)

print("Dataset Structure:")
print(response.text[:500])

Dataset Structure:
Dataset {
    Float32 Y[Y = 360];
    Float32 X[X = 720];
    Float32 T[T = 324];
    Grid {
     ARRAY:
        Float32 rain[T = 324][Y = 360][X = 720];
     MAPS:
        Float32 T[T = 324];
        Float32 Y[Y = 360];
        Float32 X[X = 720];
    } rain;
} rain;



In [7]:
# DAS contains the actual metadata (units, time range, etc.)
das_url = base_url + ".das"
das_response = requests.get(das_url)

print("Dataset Attributes:")
print(das_response.text[:1000])

Dataset Attributes:
Attributes {
    Y {
        String standard_name "latitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 0;
        String units "degree_north";
    }
    X {
        String standard_name "longitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 1;
        String units "degree_east";
    }
    T {
        Float32 pointwidth 1.0;
        String calendar "360";
        Int32 gridtype 0;
        String units "months since 1960-01-01";
    }
    rain {
        Int32 pointwidth 0;
        String standard_name "lwe_precipitation_rate";
        Float32 file_missing_value -999.0;
        String history "Boxes with less than 0.0% dropped";
        Float32 missing_value NaN;
        String units "mm/day";
        String long_name "Monthly Precipitation";
    }
NC_GLOBAL {
    String Conventions "IRIDL";
}
}



## Part 3 Answers

### Task 1: Dimensions and Variables
- **Dimension names:** T (time), Y (latitude), X (longitude)
- **Main variable name:** rain

### Task 3: Dataset Documentation
- **What it contains:** Monthly global precipitation estimates based on rain gauge observations, produced by NOAA/NCEP/CPC
- **Time period:** Retrospective period from 1979 onwards
- **Geographic region:** Global coverage — latitude -90° to 90°, longitude 0° to 360°
- **Units:** mm/day (millimeters per day)