##### Imports

In [1]:
import json
import os

import boto3
import pandas as pd
from botocore.exceptions import ClientError, NoCredentialsError
from dotenv import find_dotenv, load_dotenv

#### Load env variables

In [2]:
load_dotenv(find_dotenv())

True

#### Create S3 Client

In [3]:
S3_BUCKET = "data"  # Replace with your S3 bucket name
S3_KEY = "population-data.json"  # Replace with your S3 object key

s3 = boto3.client(
    "s3",
    endpoint_url=os.environ.get("MINIO_ENDPOINT"),
    aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
    region_name=os.environ.get("AWS_REGION") or "us-east-1",
)

#### Fetch the data from S3

In [4]:
try:
    response = s3.get_object(Bucket=S3_BUCKET, Key=S3_KEY)
    content = response["Body"].read().decode("utf-8")
    population_json = json.loads(content)
except NoCredentialsError:
    print("Error: AWS credentials not found.")
    exit(1)
except ClientError as e:
    print(f"Error fetching file from S3: {e}")
    exit(1)

#### Load the JSON data into Pandas Dataframe

In [5]:
df = pd.DataFrame(population_json["data"])

# Ensure Population is numeric
df["Population"] = pd.to_numeric(df["Population"])

# Filter years 2013 to 2018 (inclusive)
df_filtered = df[(df["ID Year"] >= 2013) & (df["ID Year"] <= 2018)]

# --------------------------
# Compute mean and standard deviation
# --------------------------
mean_population = df_filtered["Population"].mean()
std_population = df_filtered["Population"].std()

#### Display the calculations and Results

In [6]:
print("US Population Statistics (2013-2018):")
print(f"Mean population: {mean_population:,.0f}")
print(f"Standard deviation: {std_population:,.0f}")

US Population Statistics (2013-2018):
Mean population: 322,034,930
Standard deviation: 4,075,506
