In [9]:
import requests
import pandas as pd

In [17]:
# Fetch the JSON data from the URL
url = "https://chronicdata.cdc.gov/views/735e-byxc/rows.json?accessType=DOWNLOAD"
response = requests.get(url)
data = response.json()

# Extract the required fields from the JSON data
rows = data['data']
columns = data['meta']['view']['columns']

# Find the indices of the desired columns
column_names = [column['name'] for column in columns]
year_start_index = column_names.index('YearStart')
location_abbr_index = column_names.index('LocationAbbr')
data_value_index = column_names.index('Data_Value')
geolocation_index = column_names.index('GeoLocation')
sample_size_index = column_names.index('Sample_Size')
question_index = column_names.index('Question')
states_index = column_names.index('States')
counties_index = column_names.index('Counties')
stratification1_index = column_names.index('Stratification1')

# Create a list to store the extracted data
extracted_data = []
for row in rows:
    geolocation = row[geolocation_index]
    latitude = geolocation[1] if len(geolocation) > 1 else None
    longitude = geolocation[2] if len(geolocation) > 2 else None
    
    question = row[question_index]
    age_category = '3-23 months old' if '3-23 months old' in question else 'aged 2 to 4 year'
    
    extracted_data.append([
        row[year_start_index],
        row[location_abbr_index],
        row[data_value_index],
        latitude,
        longitude,
        row[sample_size_index],
        question,
        age_category,
        row[states_index],
        row[counties_index],
        row[stratification1_index]
    ])

# Create a dataframe from the extracted data
df = pd.DataFrame(extracted_data, columns=['YearStart', 'LocationAbbr', 'Data_Value', 'Latitude', 'Longitude', 'Sample_Size', 'Question', 'Age_Category', 'States', 'Counties', 'Stratification1'])

In [32]:
df.head()

Unnamed: 0,YearStart,LocationAbbr,Data_Value,Latitude,Longitude,Sample_Size,Question,Age_Category,States,Counties,Stratification1
0,2018,WA,13.7,47.522279,-120.470011,12230.0,Percent of WIC children aged 3-23 months old w...,3-23 months old,6.0,2956.0,18 - 23
1,2018,HI,10.2,21.30485,-157.857749,2192.0,Percent of WIC children aged 3-23 months old w...,3-23 months old,4.0,1657.0,Hispanic
2,2020,MA,17.6,42.27687,-72.082691,13520.0,Percent of WIC children aged 2 to 4 years who ...,aged 2 to 4 year,25.0,1919.0,Hispanic
3,2020,TN,18.9,35.680941,-85.774491,4947.0,Percent of WIC children aged 2 to 4 years who ...,aged 2 to 4 year,39.0,2159.0,Hispanic
4,2012,GU,12.0,13.444304,144.793731,1456.0,Percent of WIC children aged 2 to 4 years who ...,aged 2 to 4 year,,,Male


In [28]:
df['YearStart'] = df['YearStart'].astype(int)
df['Data_Value'] = df['Data_Value'].astype(float)
df['Sample_Size'] = df['Sample_Size'].astype(float)
df['Latitude'] = df['Latitude'].astype(float)
df['Longitude'] = df['Longitude'].astype(float)

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12852 entries, 0 to 12851
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   YearStart        12852 non-null  int32  
 1   LocationAbbr     12852 non-null  object 
 2   Data_Value       12473 non-null  float64
 3   Latitude         12852 non-null  float64
 4   Longitude        12852 non-null  float64
 5   Sample_Size      12473 non-null  float64
 6   Question         12852 non-null  object 
 7   Age_Category     12852 non-null  object 
 8   States           12138 non-null  object 
 9   Counties         12376 non-null  object 
 10  Stratification1  12852 non-null  object 
dtypes: float64(4), int32(1), object(6)
memory usage: 1.0+ MB


In [30]:
data_value_stats = df['Data_Value'].describe()
print(data_value_stats)

count    12473.000000
mean        13.919971
std          3.643496
min          1.500000
25%         11.600000
50%         14.100000
75%         16.200000
max         36.100000
Name: Data_Value, dtype: float64


In [31]:
df.count()

YearStart          12852
LocationAbbr       12852
Data_Value         12473
Latitude           12852
Longitude          12852
Sample_Size        12473
Question           12852
Age_Category       12852
States             12138
Counties           12376
Stratification1    12852
dtype: int64