# Data Quest Part 3: Data Analytics

This notebook covers:
- Loading and analyzing US population data from a JSON API response
- Loading and analyzing a time-series dataset from a CSV file
- Joining the datasets for combined analysis

In [None]:
import pandas as pd
import json

## Load the CSV (Part 1)

In [None]:
csv_df = pd.read_csv("pr.data.0.Current")
csv_df.columns = csv_df.columns.str.strip()

## Load the JSON (Part 2)

In [None]:
with open("population_data.json", "r") as f:
    population_data = json.load(f)

pop_records = [
    {
        "year": int(item["date"]),
        "Population": int(item["value"].replace(",", ""))
    }
    for item in population_data["data"]
]
pop_df = pd.DataFrame(pop_records)

## Part A: US Population Mean and Standard Deviation (2013â€“2018)

In [None]:
filtered_pop = pop_df[(pop_df["year"] >= 2013) & (pop_df["year"] <= 2018)]
mean_population = filtered_pop["Population"].mean()
std_population = filtered_pop["Population"].std()
print("Mean Population (2013-2018):", mean_population)
print("Standard Deviation (2013-2018):", std_population)

## Part B: Best Year by Series ID (Sum of Values per Year)

In [None]:
csv_df["value"] = pd.to_numeric(csv_df["value"], errors="coerce")
grouped = csv_df.groupby(["series_id", "year"])["value"].sum().reset_index()
best_year_df = grouped.loc[grouped.groupby("series_id")["value"].idxmax()].reset_index(drop=True)
best_year_df

## Part C: Join Series and Population Data for PRS30006032 and Q01

In [None]:
filtered_series = csv_df[
    (csv_df["series_id"] == "PRS30006032") &
    (csv_df["period"] == "Q01")
].copy()

merged_df = pd.merge(
    filtered_series,
    pop_df,
    how="left",
    on="year"
)

final_df = merged_df[["series_id", "year", "period", "value", "Population"]]
final_df