## Setup: Install Required Packages
#### Run this cell first:

In [8]:
# You may need to install these
# pip install pooch requests

import requests
import json

## Part 1: Working with JSON Data
#### JSON (JavaScript Object Notation) is commonly used for web APIs. Many climate data services provide JSON outputs.

### Provided Code:

In [10]:
import json
import requests
# Here's a sample JSON structure similar to what APIs return
sample_json = '''
{
  'station': 'USC00305800',
  'name': 'New York Central Park',
  'location': {
    'latitude': 40.7789,
    'longitude': -73.9692
  },
  'observations': [
    {'date': '2023-01-01', 'temperature': 32, 'precipitation': 0.0},
    {'date': '2023-01-02', 'temperature': 28, 'precipitation': 0.5},
    {'date': '2023-01-03', 'temperature': 35, 'precipitation': 0.0},
    {'date': '2023-01-04', 'temperature': 38, 'precipitation': 0.2},
    {'date': '2023-01-05', 'temperature': 41, 'precipitation': 0.0}
  ]
}
'''

# Parse the JSON
data = json.loads(sample_json)

# Access nested data
print('Station:', data['station'])
print('Location:', data['location'])
print('First observation:', data['observations'][0])

Station: USC00305800
Location: {'latitude': 40.7789, 'longitude': -73.9692}
First observation: {'date': '2023-01-01', 'temperature': 32, 'precipitation': 0.0}


### Your Tasks:

In [43]:
print(data['observations'])

[{'date': '2023-01-01', 'temperature': 32, 'precipitation': 0.0}, {'date': '2023-01-02', 'temperature': 28, 'precipitation': 0.5}, {'date': '2023-01-03', 'temperature': 35, 'precipitation': 0.0}, {'date': '2023-01-04', 'temperature': 38, 'precipitation': 0.2}, {'date': '2023-01-05', 'temperature': 41, 'precipitation': 0.0}]


In [102]:
# 1. Extract and print all dates and temperatures (8 points)
### {} | [] | () all mean something different!!!! 

total_temp = 0
count = 0

print('Date, Temperature')
for obs in data['observations']:
    date = obs['date']
    temp = obs['temperature']
    print(f"{date}, {temp}")
    pass
    
## is this to start at 0? 
total_temp = 0
count = 0

for obs in data['observations']:
    total_temp += obs['temperature']
    count += 1
    pass

## don't put it in the loop
print(f'Total Temp: {total_temp}')
print(f'Count:{count}')

avg_temp = total_temp/count  # Replace this
print(f'Average temperature: {avg_temp}째F')

Date, Temperature
2023-01-01, 32
2023-01-02, 28
2023-01-03, 35
2023-01-04, 38
2023-01-05, 41
Total Temp: 174
Count:5
Average temperature: 34.8째F


In [107]:
# 3. Find days with precipitation (9 points)
print("\nDays with precipitation:")

for obs in data['observations']:
    if obs['precipitation'] > 0:
        date = obs['date']
        precip = obs['precipitation']
        print(f"{date}, {precip}")
# YOUR CODE HERE


Days with precipitation:
2023-01-02, 0.5
2023-01-04, 0.2


### Now try with a real API :

In [None]:
# Use a real weather API (you may need to sign up for a free API key)
# Example APIs: OpenWeatherMap, NOAA, Weather.gov

## I used the QandA to get my site: https://weather-gov.github.io/api/general-faqs

#I used weather.gov API of the same location as the example
##https://api.weather.gov/points/40.7789,-73.9692
##Properties: # What do I use (no precip/temp? in observation)  
               #"forecast": "https://api.weather.gov/gridpoints/OKX/34,38/forecast"
               #"observationStations": "https://api.weather.gov/gridpoints/OKX/34,38/stations",

In [141]:
wdg = 'https://api.weather.gov/gridpoints/OKX/34,38/forecast'
resp = requests.get(wdg)
wdg_dat = json.loads(resp.text)
wdg_dat

##TEMP

print("Date and Time, Temperature (F)")
for obs in wdg_dat['properties']['periods']:
    time = obs['startTime']
    wsgtemp = obs['temperature']
    print(f"{time}: {wsgtemp}")

wdgtotal_temp = 0
wdgcount = 0

for obs in wdg_dat['properties']['periods']:
    wdgtotal_temp += obs['temperature']
    wdgcount += 1
    pass

## don't put it in the loop
print(f'Total Temp: {wdgtotal_temp}')
print(f'Count:{wdgcount}')

wdgavg_temp = wdgtotal_temp/wdgcount  # Replace this
print(f'Average temperature: {wdgavg_temp}째F')



Date and Time, Temperature (F)
2026-02-17T06:00:00-05:00: 47
2026-02-17T18:00:00-05:00: 37
2026-02-18T06:00:00-05:00: 45
2026-02-18T18:00:00-05:00: 37
2026-02-19T06:00:00-05:00: 43
2026-02-19T18:00:00-05:00: 35
2026-02-20T06:00:00-05:00: 42
2026-02-20T18:00:00-05:00: 36
2026-02-21T06:00:00-05:00: 43
2026-02-21T18:00:00-05:00: 31
2026-02-22T06:00:00-05:00: 38
2026-02-22T18:00:00-05:00: 27
2026-02-23T06:00:00-05:00: 36
2026-02-23T18:00:00-05:00: 22
Total Temp: 519
Count:14
Average temperature: 37.07142857142857째F


In [137]:
# 3. Find days with precipitation (9 points)
print("\nTimes with precipitation:")

for obs in wdg_dat['properties']['periods']:
    if obs['probabilityOfPrecipitation']['value'] > 0:
        wdgdate = obs['startTime']
        wdgprecip = obs['probabilityOfPrecipitation']['value']
        print(f"{wdgdate}, {wdgprecip}")


Times with precipitation:
2026-02-17T06:00:00-05:00, 13
2026-02-17T18:00:00-05:00, 7
2026-02-18T06:00:00-05:00, 80
2026-02-18T18:00:00-05:00, 80
2026-02-19T06:00:00-05:00, 16
2026-02-19T18:00:00-05:00, 39
2026-02-20T06:00:00-05:00, 83
2026-02-20T18:00:00-05:00, 83
2026-02-21T06:00:00-05:00, 27
2026-02-21T18:00:00-05:00, 30
2026-02-22T06:00:00-05:00, 48
2026-02-22T18:00:00-05:00, 48
2026-02-23T06:00:00-05:00, 34
2026-02-23T18:00:00-05:00, 16


## Part 2: Downloading Files with Python
#### Pooch is a Python tool for downloading and caching data files.

### Provided Code:

In [139]:
import pooch
import os

# Set up Pooch to download a file
# This example downloads a small air quality dataset
file_path = pooch.retrieve(
    url='https://github.com/pandas-dev/pandas/raw/main/doc/data/air_quality_no2.csv',
    known_hash=None
)

print('File downloaded to:', file_path)
print('File exists:', os.path.exists(file_path))

File downloaded to: /home/nc3225/.cache/pooch/458dad453f6a48e510cd544bef1854e3-air_quality_no2.csv
File exists: True


### Your Tasks:

In [142]:
import os

# 1. Verify the file was downloaded (5 points)
# Check the file size
file_size = os.path.getsize(file_path)
print(f'File size: {file_size} bytes')

# YOUR CODE HERE: open the file and count how many lines it has
line_count = 0 

with open(file_path, 'r') as file:
    for line in file:
        line_count += 1

print(f'Number of lines: {line_count}')

File size: 31984 bytes
Number of lines: 1036


In [144]:
# 2. Download another file (10 points)
# Find a climate dataset online using the sources we talked about in lecture
# Download it using Pooch

# YOUR CODE HERE:
import pooch
POOCH = pooch.create(
    path=pooch.os_cache("2017_Antarctica_P3/CSARP_mvdr"),
    base_url="https://data.cresis.ku.edu/data/rds/2017_Antarctica_P3/CSARP_mvdr/20171124_03/",
    registry={
        "Data_img_02_20171124_03_020.mat": None,
    }
)

local_fname = POOCH.fetch("Data_img_02_20171124_03_020.mat")
local_fname


# 3. Create a data inventory (5 points)
# List all the files you've downloaded in this assignment
print('\nData Inventory:')
print('1. air_quality_no2.csv - Air quality NO2 measurements')
print('2. 2017_Antarctica_P3/CSARP_mvdr - Antarctic Data_img')
# YOUR CODE HERE: add your file from task 2


Data Inventory:
1. air_quality_no2.csv - Air quality NO2 measurements
2. 2017_Antarctica_P3/CSARP_mvdr - Antarctic Data_img


## Part 3: Understanding NetCDF Metadata
#### NetCDF is a common format for climate data. Even without loading the full dataset, we can examine its metadata using HTTP requests.

### Provided Code:

In [145]:
import requests

# OPeNDAP provides metadata in different formats
# We'll get basic info about a climate dataset

base_url = 'http://iridl.ldeo.columbia.edu/expert/SOURCES/.NOAA/.NCEP/.CPC/.UNIFIED_PRCP/.GAUGE_BASED/.GLOBAL/.v1p0/.Monthly/.RETRO/.rain/dods'

# Get DDS (Dataset Descriptor Structure) - describes the structure
dds_url = base_url + '.dds'
response = requests.get(dds_url)

print('Dataset Structure:')
print(response.text[:500])  # Print first 500 characters

Dataset Structure:
Dataset {
    Float32 Y[Y = 360];
    Float32 X[X = 720];
    Float32 T[T = 324];
    Grid {
     ARRAY:
        Float32 rain[T = 324][Y = 360][X = 720];
     MAPS:
        Float32 T[T = 324];
        Float32 Y[Y = 360];
        Float32 X[X = 720];
    } rain;
} rain;



### Your Tasks:

In [None]:
# 1. Identify dimensions and variables (5 points)
# Look at the DDS output above and answer:
# - What are the dimension names?
# - What is the main variable name?
# - Write your answers in a markdown cell

#### Dimension names = Y: Latitude | X: Longitude | T: Time?
#### Main Variable names = rain

In [150]:
# 2. Get data attributes (5 points)
# DAS (Dataset Attribute Structure) contains metadata
das_url = base_url + '.das'
responsedas = requests.get(das_url)
# YOUR CODE HERE: make a request to das_url and print first 1000 characters
print(responsedas.text[:1000]) 

Attributes {
    Y {
        String standard_name "latitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 0;
        String units "degree_north";
    }
    X {
        String standard_name "longitude";
        Float32 pointwidth 0.5;
        Int32 gridtype 1;
        String units "degree_east";
    }
    T {
        Float32 pointwidth 1.0;
        String calendar "360";
        Int32 gridtype 0;
        String units "months since 1960-01-01";
    }
    rain {
        Int32 pointwidth 0;
        String standard_name "lwe_precipitation_rate";
        Float32 file_missing_value -999.0;
        String history "Boxes with less than 0.0% dropped";
        Float32 missing_value NaN;
        String units "mm/day";
        String long_name "Monthly Precipitation";
    }
NC_GLOBAL {
    String Conventions "IRIDL";
}
}



In [None]:
# 3. Document what you learned (5 points)
# In a markdown cell, write:
# - What does this dataset contain?
# - What time period does it cover?
# - What geographic region does it cover?
# - What are the units of the main variable?
# Find this info in the DAS output

#### Contains: Monthly Precipitation Rate over a 360-Day Calendar
#### Time-period: Since 1960-01-01
#### Geographic Region: Northeast: IRIDL
#### Units of Main Variables: mm/day