In [None]:
import os
import requests
import boto3
from time import sleep
from botocore.exceptions import NoCredentialsError, ClientError

# AWS S3 config
s3 = boto3.client('s3',
)
                  bucket_name = "de300spring2025"
s3_prefix = "MOSES_group/midway_data"

# LCD source config
base_url = "https://www.ncei.noaa.gov/data/local-climatological-data/access"
station_filename = "72534014819.csv"

def download_and_upload_to_s3(years, delay=1):
    for year in years:
        url = f"{base_url}/{year}/{station_filename}"
        local_filename = f"{year}_{station_filename}"
        s3_key = f"{s3_prefix}/{local_filename}"

        print(f"\nFetching {url} ...")

        response = requests.get(url)
        if response.status_code == 200:
            with open(local_filename, "wb") as f:
                f.write(response.content)
            print(f"Downloaded {local_filename}")

            try:
                s3.upload_file(local_filename, bucket_name, s3_key)
                print(f"Uploaded to s3://{bucket_name}/{s3_key}")
            except (NoCredentialsError, ClientError) as e:
                print(f"Failed to upload to S3: {e}")
        elif response.status_code == 404:
            print(f"No file for year {year} (404). Skipping.")
        else:
            print(f"HTTP error {response.status_code} for year {year}")

        if os.path.exists(local_filename):
            os.remove(local_filename)

        sleep(delay)

years = list(range(1973, 2025))
download_and_upload_to_s3(years)


Fetching https://www.ncei.noaa.gov/data/local-climatological-data/access/1973/72534014819.csv ...
Downloaded 1973_72534014819.csv
Uploaded to s3://de300spring2025/MOSES_group/midway_data/1973_72534014819.csv

Fetching https://www.ncei.noaa.gov/data/local-climatological-data/access/1974/72534014819.csv ...
Downloaded 1974_72534014819.csv
Uploaded to s3://de300spring2025/MOSES_group/midway_data/1974_72534014819.csv

Fetching https://www.ncei.noaa.gov/data/local-climatological-data/access/1975/72534014819.csv ...
Downloaded 1975_72534014819.csv
Uploaded to s3://de300spring2025/MOSES_group/midway_data/1975_72534014819.csv

Fetching https://www.ncei.noaa.gov/data/local-climatological-data/access/1976/72534014819.csv ...
Downloaded 1976_72534014819.csv
Uploaded to s3://de300spring2025/MOSES_group/midway_data/1976_72534014819.csv

Fetching https://www.ncei.noaa.gov/data/local-climatological-data/access/1977/72534014819.csv ...
Downloaded 1977_72534014819.csv
Uploaded to s3://de300spring2025/

In [None]:
import boto3
import pandas as pd
import io

s3 = boto3.client('s3',
)

bucket = 'de300spring2025'
prefix = 'MOSES_group/midway_data/'

response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)

df_list = []

for obj in response.get('Contents', []):
    key = obj['Key']
    if key.endswith('.csv'):
        print(f"Processing: {key}")

        csv_obj = s3.get_object(Bucket=bucket, Key=key)
        body = csv_obj['Body'].read().decode('utf-8')

        df = pd.read_csv(io.StringIO(body))
        df['source_file'] = key
        df_list.append(df)

# Concatenate all DataFrames
combined_df = pd.concat(df_list, ignore_index=True)

# Show first few rows
print(combined_df.head())

Processing: MOSES_group/midway_data/1973_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1974_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1975_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1976_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1977_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1978_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1979_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1980_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1981_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1982_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1983_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1984_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1985_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1986_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1987_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1988_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1989_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1990_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1991_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1992_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1993_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1994_72534014819.csv
Processing: MOSES_group/midway_data/1995_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1996_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1997_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1998_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/1999_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2000_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2001_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2002_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2003_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2004_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2005_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2006_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2007_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2008_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2009_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2010_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2011_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2012_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2013_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2014_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2015_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2016_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2017_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2018_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2019_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2020_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2021_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2022_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2023_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


Processing: MOSES_group/midway_data/2024_72534014819.csv


  df = pd.read_csv(io.StringIO(body))


       STATION                 DATE  LATITUDE  LONGITUDE  ELEVATION  \
0  72534014819  1973-01-01T00:00:00  41.78412  -87.75514      185.8   
1  72534014819  1973-01-01T01:00:00  41.78412  -87.75514      185.8   
2  72534014819  1973-01-01T02:00:00  41.78412  -87.75514      185.8   
3  72534014819  1973-01-01T03:00:00  41.78412  -87.75514      185.8   
4  72534014819  1973-01-01T04:00:00  41.78412  -87.75514      185.8   

                            NAME REPORT_TYPE SOURCE HourlyAltimeterSetting  \
0  CHICAGO MIDWAY AIRPORT, IL US       SY-SA      C                    NaN   
1  CHICAGO MIDWAY AIRPORT, IL US       SAO        C                  30.06   
2  CHICAGO MIDWAY AIRPORT, IL US       SAO        C                  30.06   
3  CHICAGO MIDWAY AIRPORT, IL US       SAO        C                  30.09   
4  CHICAGO MIDWAY AIRPORT, IL US       SAO        C                  30.09   

  HourlyDewPointTemperature  ... BackupDistance BackupDistanceUnit  \
0                      11.0  ...  

In [None]:
csv_buffer = io.StringIO()
combined_df.to_csv(csv_buffer, index=False)

output_key = 'MOSES_group/midway_data/combined_midway_data.csv'

s3.put_object(Bucket=bucket, Key=output_key, Body=csv_buffer.getvalue())

print(f"Combined file uploaded to s3://{bucket}/{output_key}")

✅ Combined file uploaded to s3://de300spring2025/MOSES_group/midway_data/combined_midway_data.csv
