**Request API to get data of Median House Income, Highschool completed, Poverty, Race Distribution**

In [4]:
import requests
import pandas as pd
import os

# Replace with your Census API key
API_KEY = "c70eb88268138cf1150b4faf275ac175aa56dd4b" 

# Define base URL for the ACS API
base_url = "https://api.census.gov/data/{year}/acs/{dataset}"

# Define function to make API calls
def get_census_data(year, variable, dataset="acs1", geo_level="place:*"):
    url = base_url.format(year=year, dataset=dataset)
    params = {
        "get": variable,
        "for": geo_level,
        "key": API_KEY
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        df = pd.DataFrame(data[1:], columns=data[0])
        df['year'] = year  # Add year column to identify data from each year
        return df
    else:
        print(f"Error {response.status_code} for year {year} and variable {variable}: {response.text}")
        return None

# Years for which data is required
years = range(2019, 2024)  # 2019-2023

# Variables for each metric
variables = {
    "Median_Household_Income": "B19013_001E",
    "High_School_Completion": "B15003_017E",
    "Below_Poverty_Level": "B17001_002E",
    "Race_Distribution": "B02001_002E,B02001_003E,B02001_004E,B02001_005E,B02001_006E,B02001_007E,B02001_008E"
}

# Dictionary to store data for each variable across years
data_frames = {}

# Loop through each variable and each year, collect and append data
for var_name, var_code in variables.items():
    yearly_data = []
    for year in years:
        # Use acs1 dataset by default, but use acs5 for 2020 due to limited availability in acs1
        dataset = "acs5" if year == 2020 else "acs1"
        df = get_census_data(year, var_code, dataset=dataset)
        if df is not None:
            yearly_data.append(df)
    # Concatenate data for each year into a single DataFrame for this variable
    if yearly_data:
        data_frames[var_name] = pd.concat(yearly_data, ignore_index=True)

# Access each DataFrame for analysis
for key, value in data_frames.items():
    print(f"\n{key} Data (2019-2023):")
    print(value.head())



# Export each DataFrame to a CSV file
data_frames["Median_Household_Income"].to_csv("/Users/celinepham/Documents/IRONHACK/FINAL_PROJECT/raw/Median_Household_Income_2019_2023.csv", index=False)
data_frames["High_School_Completion"].to_csv("/Users/celinepham/Documents/IRONHACK/FINAL_PROJECT/raw/High_School_Completion_2019_2023.csv", index=False)
data_frames["Below_Poverty_Level"].to_csv("/Users/celinepham/Documents/IRONHACK/FINAL_PROJECT/raw/Below_Poverty_Level_2019_2023.csv", index=False)
data_frames["Race_Distribution"].to_csv("/Users/celinepham/Documents/IRONHACK/FINAL_PROJECT/raw/Race_Distribution_2019_2023.csv", index=False)

print("Data exported successfully to CSV files.")


Median_Household_Income Data (2019-2023):
  B19013_001E state  place  year
0       46031    01  03076  2019
1       36753    01  07000  2019
2       47411    01  21184  2019
3       98689    01  35896  2019
4       54342    01  37000  2019

High_School_Completion Data (2019-2023):
  B15003_017E state  place  year
0       39166    39  15000  2019
1       14228    49  55980  2019
2       23964    49  83470  2019
3       12594    48  19972  2019
4       16786    48  48072  2019

Below_Poverty_Level Data (2019-2023):
  B17001_002E state  place  year
0       67331    39  15000  2019
1        8732    49  55980  2019
2       19946    49  83470  2019
3       18334    48  19972  2019
4       15352    48  48072  2019

Race_Distribution Data (2019-2023):
  B02001_002E B02001_003E B02001_004E B02001_005E B02001_006E B02001_007E  \
0       58595      142107        1323        3497           0        1877   
1       40302       24139          77         847          21        1739   
2       61453 

**Retrieve State Population's data to get more details**

In [5]:

API_KEY = "c70eb88268138cf1150b4faf275ac175aa56dd4b"

# FIPS code to state abbreviation mapping
fips_to_state = {
    1: 'AL', 2: 'AK', 4: 'AZ', 5: 'AR', 6: 'CA', 8: 'CO', 9: 'CT', 10: 'DE',
    11: 'DC', 12: 'FL', 13: 'GA', 15: 'HI', 16: 'ID', 17: 'IL', 18: 'IN',
    19: 'IA', 20: 'KS', 21: 'KY', 22: 'LA', 23: 'ME', 24: 'MD', 25: 'MA',
    26: 'MI', 27: 'MN', 28: 'MS', 29: 'MO', 30: 'MT', 31: 'NE', 32: 'NV',
    33: 'NH', 34: 'NJ', 35: 'NM', 36: 'NY', 37: 'NC', 38: 'ND', 39: 'OH',
    40: 'OK', 41: 'OR', 42: 'PA', 44: 'RI', 45: 'SC', 46: 'SD', 47: 'TN',
    48: 'TX', 49: 'UT', 50: 'VT', 51: 'VA', 53: 'WA', 54: 'WV', 55: 'WI',
    56: 'WY', 72: 'PR'
}

# Census API base URL for ACS 5-Year Estimates (2019-2022)
BASE_URL = "https://api.census.gov/data"

# Define the years and variables to retrieve
years = range(2019, 2023)  # 2019 through 2022 only, as 2023 has a different endpoint
variables = {
    'total_population': 'B01001_001E',  # Total population
    'total_white': 'B02001_002E',       # White alone population
    'total_black': 'B02001_003E',       # Black or African American alone
    'total_american_indian_alaska_native': 'B02001_004E',  # American Indian/Alaska Native alone
    'total_asian': 'B02001_005E',       # Asian alone
    'total_native_hawaiian_pacific_islander': 'B02001_006E', # Native Hawaiian/Pacific Islander alone
    'total_other_race': 'B02001_007E'   # Some other race alone
}

# List to collect data frames for each year
data_frames = []

# Loop through each year, requesting data and handling missing years
for year in years:
    url = f"{BASE_URL}/{year}/acs/acs5"
    params = {
        "get": ",".join([variables[key] for key in variables]),
        "for": "state:*",
        "key": API_KEY
    }
    
    response = requests.get(url, params=params)
    
    if response.status_code == 200:
        # Convert the response JSON to DataFrame format
        result = response.json()
        columns = result[0]
        data = result[1:]
        df = pd.DataFrame(data, columns=columns)
        
        # Add the year column to differentiate records by year
        df['year'] = year
        
        # Rename columns for readability
        df.rename(columns={
            variables['total_population']: 'total_population',
            variables['total_white']: 'total_white',
            variables['total_black']: 'total_black',
            variables['total_american_indian_alaska_native']: 'total_american_indian_alaska_native',
            variables['total_asian']: 'total_asian',
            variables['total_native_hawaiian_pacific_islander']: 'total_native_hawaiian_pacific_islander',
            variables['total_other_race']: 'total_other_race',
            'state': 'state_code'
        }, inplace=True)
        
        # Map FIPS codes to state abbreviations
        df['state'] = df['state_code'].astype(int).map(fips_to_state)
        
        data_frames.append(df)
        print(f"Data for {year} retrieved successfully.")
        
        # Respectful delay between API calls to avoid hitting rate limits
        time.sleep(1)
    else:
        # Print message if the data for a particular year is not available
        print(f"Data for {year} is not available (Status code: {response.status_code})")

# Handle 2023 data from a different endpoint
url_2023 = "https://api.census.gov/data/2023/acs/acs1/profile"
params_2023 = {
    "get": "DP05_0070E,DP05_0071E,DP05_0077E,DP05_0078E,DP05_0079E,DP05_0080E,DP05_0081E",
    "for": "state:*",
    "key": API_KEY
}

response_2023 = requests.get(url_2023, params=params_2023)

if response_2023.status_code == 200:
    result_2023 = response_2023.json()
    columns_2023 = ['total_population', 'total_white', 'total_black', 
                    'total_american_indian_alaska_native', 'total_asian', 
                    'total_native_hawaiian_pacific_islander', 'total_other_race', 'state_code']
    data_2023 = result_2023[1:]
    df_2023 = pd.DataFrame(data_2023, columns=columns_2023)
    df_2023['year'] = 2023
    
    # Map FIPS codes to state abbreviations for 2023
    df_2023['state'] = df_2023['state_code'].astype(int).map(fips_to_state)
    
    data_frames.append(df_2023)
    print("Data for 2023 retrieved successfully.")
else:
    print(f"Failed to retrieve data for 2023: {response_2023.status_code}")

# Combine data from each year into a single DataFrame
population_data = pd.concat(data_frames, ignore_index=True)

# Convert population columns to numeric types for consistency and calculations
for column in ['total_population', 'total_white', 'total_black', 
               'total_american_indian_alaska_native', 'total_asian', 
               'total_native_hawaiian_pacific_islander', 'total_other_race']:
    population_data[column] = pd.to_numeric(population_data[column])

# Drop the original state_code column if no longer needed
population_data.drop(columns=['state_code'], inplace=True)

# Save the data to a CSV file for future use
population_data.to_csv("state_population_by_race_2019_2023.csv", index=False)
print("Data saved to state_population_by_race_2019_2023.csv")

     
       


Data for 2019 retrieved successfully.
Data for 2020 retrieved successfully.
Data for 2021 retrieved successfully.
Data for 2022 retrieved successfully.
Data for 2023 retrieved successfully.
Data saved to state_population_by_race_2019_2023.csv


**Web Scraping Fatal Shooting in US**


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By

chrome_option = webdriver.ChromeOptions()
chrome_option.add_experimental_option("detach", True)

driver = webdriver.Chrome(options=chrome_option)
driver.get("https://mappingpoliceviolence.us/")

button_download = driver.find_element(By.CSS_SELECTOR, 'a.sqs-block-button-element--medium.sqs-button-element--primary')
button_download.click()

