In [8]:
pip install folktables

Collecting folktables
  Using cached folktables-0.0.12-py3-none-any.whl.metadata (533 bytes)
Collecting requests (from folktables)
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting scikit-learn (from folktables)
  Downloading scikit_learn-1.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting charset-normalizer<4,>=2 (from requests->folktables)
  Downloading charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (33 kB)
Collecting idna<4,>=2.5 (from requests->folktables)
  Downloading idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting urllib3<3,>=1.21.1 (from requests->folktables)
  Downloading urllib3-2.2.2-py3-none-any.whl.metadata (6.4 kB)
Collecting certifi>=2017.4.17 (from requests->folktables)
  Downloading certifi-2024.6.2-py3-none-any.whl.metadata (2.2 kB)
Collecting scipy>=1.6.0 (from scikit-learn->folktables)
  Downloading scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.ma

In [9]:
#Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import os
import glob

sns.set_style('whitegrid')

from folktables import ACSDataSource, ACSIncome
from sklearn.linear_model import LogisticRegression

# Link to dataset: https://www2.census.gov/programs-surveys/acs/data/pums/

In [2]:
all_states = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI',
              'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI',
              'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC',
              'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT',
              'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'PR']

In [3]:
features=['AGEP','COW','SCHL','MAR','OCCP','POBP','RELP','WKHP','SEX','RAC1P','PINCP']
features_no_relp=['AGEP','COW','SCHL','MAR','OCCP','POBP','WKHP','SEX','RAC1P','PINCP']

In [4]:
current_path = os.getcwd()
year = 2022

In [5]:
central_df = columns = ['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation', 'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income']
central_df = pd.DataFrame(columns=columns)

#Download datasets for all states
for i, state in enumerate(all_states):
    print('Start loading data for ', state)
    print(f'This is state {i + 1} of {len(all_states)}')

    state_list = [state]
    data_source = ACSDataSource(survey_year=year, horizon='1-Year', survey='person')
    ca_data = data_source.get_data(states=state_list, download=True)
    if 'RELP' in ca_data.columns:
        df = ca_data[features]
        df.drop(['REPL'])
    else:
        df = ca_data[features_no_relp]
    file = 'ACS-'+str(year) +'-' + str(state_list[0]) + ".csv" 

    # add column with state
    df['State'] = state_list[0]

    # create column with binary income variable, below or above 50k
    df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]

    # rename columns to 
    df = df.rename(columns= {"AGEP": "Age", "COW": "Class of Worker", "SCHL": "Education", "MAR": "Marital Status", "OCCP":"Occupation", "POBP": "Place of Birth", "WKHP":"Worked hours", "SEX":"Sex", "RAC1P":"Race"}, errors="raise")
    
    # drop rows with NaN values
    df = df.dropna()

    # drop original income column
    df.drop('PINCP', axis=1, inplace=True)

    print('The current columns are ', df.columns)

    central_df = pd.concat([central_df, df], ignore_index=True)

    # save data as .csv file
    # filepath = os.path.join(current_path, "acs_data", file)
    # df.to_csv(filepath, index=False)
    print('Len of df', len(df))
    print('Finished loading csv for this state')
    print('--------------------')

file = 'ACS-'+str(year) +'-' + 'complete' + ".csv"
filepath = os.path.join(current_path, "acs_data", file)
central_df.to_csv(filepath, index=False)

Start loading data for  AL
This is state 1 of 51
Downloading data for 2022 1-Year person survey for AL...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]
  central_df = pd.concat([central_df, df], ignore_index=True)


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 24201
Finished loading csv for this state
--------------------
Start loading data for  AK
This is state 2 of 51
Downloading data for 2022 1-Year person survey for AK...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 3591
Finished loading csv for this state
--------------------
Start loading data for  AZ
This is state 3 of 51
Downloading data for 2022 1-Year person survey for AZ...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 35308
Finished loading csv for this state
--------------------
Start loading data for  AR
This is state 4 of 51
Downloading data for 2022 1-Year person survey for AR...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 14201
Finished loading csv for this state
--------------------
Start loading data for  CA
This is state 5 of 51
Downloading data for 2022 1-Year person survey for CA...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 201841
Finished loading csv for this state
--------------------
Start loading data for  CO
This is state 6 of 51
Downloading data for 2022 1-Year person survey for CO...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 33682
Finished loading csv for this state
--------------------
Start loading data for  CT
This is state 7 of 51
Downloading data for 2022 1-Year person survey for CT...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 20487
Finished loading csv for this state
--------------------
Start loading data for  DE
This is state 8 of 51
Downloading data for 2022 1-Year person survey for DE...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 4839
Finished loading csv for this state
--------------------
Start loading data for  FL
This is state 9 of 51
Downloading data for 2022 1-Year person survey for FL...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 105790
Finished loading csv for this state
--------------------
Start loading data for  GA
This is state 10 of 51
Downloading data for 2022 1-Year person survey for GA...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 55896
Finished loading csv for this state
--------------------
Start loading data for  HI
This is state 11 of 51
Downloading data for 2022 1-Year person survey for HI...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 7880
Finished loading csv for this state
--------------------
Start loading data for  ID
This is state 12 of 51
Downloading data for 2022 1-Year person survey for ID...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 9978
Finished loading csv for this state
--------------------
Start loading data for  IL
This is state 13 of 51
Downloading data for 2022 1-Year person survey for IL...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 67326
Finished loading csv for this state
--------------------
Start loading data for  IN
This is state 14 of 51
Downloading data for 2022 1-Year person survey for IN...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 36514
Finished loading csv for this state
--------------------
Start loading data for  IA
This is state 15 of 51
Downloading data for 2022 1-Year person survey for IA...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 18097
Finished loading csv for this state
--------------------
Start loading data for  KS
This is state 16 of 51
Downloading data for 2022 1-Year person survey for KS...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 15908
Finished loading csv for this state
--------------------
Start loading data for  KY
This is state 17 of 51
Downloading data for 2022 1-Year person survey for KY...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 22632
Finished loading csv for this state
--------------------
Start loading data for  LA
This is state 18 of 51
Downloading data for 2022 1-Year person survey for LA...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 21008
Finished loading csv for this state
--------------------
Start loading data for  ME
This is state 19 of 51
Downloading data for 2022 1-Year person survey for ME...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 7399
Finished loading csv for this state
--------------------
Start loading data for  MD
This is state 20 of 51
Downloading data for 2022 1-Year person survey for MD...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 33665
Finished loading csv for this state
--------------------
Start loading data for  MA
This is state 21 of 51
Downloading data for 2022 1-Year person survey for MA...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 41625
Finished loading csv for this state
--------------------
Start loading data for  MI
This is state 22 of 51
Downloading data for 2022 1-Year person survey for MI...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 50595
Finished loading csv for this state
--------------------
Start loading data for  MN
This is state 23 of 51
Downloading data for 2022 1-Year person survey for MN...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 32012
Finished loading csv for this state
--------------------
Start loading data for  MS
This is state 24 of 51
Downloading data for 2022 1-Year person survey for MS...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 13519
Finished loading csv for this state
--------------------
Start loading data for  MO
This is state 25 of 51
Downloading data for 2022 1-Year person survey for MO...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 32181
Finished loading csv for this state
--------------------
Start loading data for  MT
This is state 26 of 51
Downloading data for 2022 1-Year person survey for MT...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 5770
Finished loading csv for this state
--------------------
Start loading data for  NE
This is state 27 of 51
Downloading data for 2022 1-Year person survey for NE...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 11111
Finished loading csv for this state
--------------------
Start loading data for  NV
This is state 28 of 51
Downloading data for 2022 1-Year person survey for NV...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 15569
Finished loading csv for this state
--------------------
Start loading data for  NH
This is state 29 of 51
Downloading data for 2022 1-Year person survey for NH...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 7965
Finished loading csv for this state
--------------------
Start loading data for  NJ
This is state 30 of 51
Downloading data for 2022 1-Year person survey for NJ...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 50354
Finished loading csv for this state
--------------------
Start loading data for  NM
This is state 31 of 51
Downloading data for 2022 1-Year person survey for NM...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 9219
Finished loading csv for this state
--------------------
Start loading data for  NY
This is state 32 of 51
Downloading data for 2022 1-Year person survey for NY...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 105420
Finished loading csv for this state
--------------------
Start loading data for  NC
This is state 33 of 51
Downloading data for 2022 1-Year person survey for NC...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 54857
Finished loading csv for this state
--------------------
Start loading data for  ND
This is state 34 of 51
Downloading data for 2022 1-Year person survey for ND...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 4498
Finished loading csv for this state
--------------------
Start loading data for  OH
This is state 35 of 51
Downloading data for 2022 1-Year person survey for OH...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 62508
Finished loading csv for this state
--------------------
Start loading data for  OK
This is state 36 of 51
Downloading data for 2022 1-Year person survey for OK...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 18442
Finished loading csv for this state
--------------------
Start loading data for  OR
This is state 37 of 51
Downloading data for 2022 1-Year person survey for OR...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 22456
Finished loading csv for this state
--------------------
Start loading data for  PA
This is state 38 of 51
Downloading data for 2022 1-Year person survey for PA...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 68221
Finished loading csv for this state
--------------------
Start loading data for  RI
This is state 39 of 51
Downloading data for 2022 1-Year person survey for RI...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 5760
Finished loading csv for this state
--------------------
Start loading data for  SC
This is state 40 of 51
Downloading data for 2022 1-Year person survey for SC...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 26933
Finished loading csv for this state
--------------------
Start loading data for  SD
This is state 41 of 51
Downloading data for 2022 1-Year person survey for SD...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 4961
Finished loading csv for this state
--------------------
Start loading data for  TN
This is state 42 of 51
Downloading data for 2022 1-Year person survey for TN...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 36419
Finished loading csv for this state
--------------------
Start loading data for  TX
This is state 43 of 51
Downloading data for 2022 1-Year person survey for TX...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 149261
Finished loading csv for this state
--------------------
Start loading data for  UT
This is state 44 of 51
Downloading data for 2022 1-Year person survey for UT...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 18921
Finished loading csv for this state
--------------------
Start loading data for  VT
This is state 45 of 51
Downloading data for 2022 1-Year person survey for VT...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 3833
Finished loading csv for this state
--------------------
Start loading data for  VA
This is state 46 of 51
Downloading data for 2022 1-Year person survey for VA...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 47371
Finished loading csv for this state
--------------------
Start loading data for  WA
This is state 47 of 51
Downloading data for 2022 1-Year person survey for WA...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 42172
Finished loading csv for this state
--------------------
Start loading data for  WV
This is state 48 of 51
Downloading data for 2022 1-Year person survey for WV...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 8056
Finished loading csv for this state
--------------------
Start loading data for  WI
This is state 49 of 51
Downloading data for 2022 1-Year person survey for WI...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 32774
Finished loading csv for this state
--------------------
Start loading data for  WY
This is state 50 of 51
Downloading data for 2022 1-Year person survey for WY...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 3240
Finished loading csv for this state
--------------------
Start loading data for  PR
This is state 51 of 51
Downloading data for 2022 1-Year person survey for PR...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['State'] = state_list[0]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Income'] = ['>50K' if v>50000 else '<=50K' for v in df['PINCP']]


The current columns are  Index(['Age', 'Class of Worker', 'Education', 'Marital Status', 'Occupation',
       'Place of Birth', 'Worked hours', 'Sex', 'Race', 'State', 'Income'],
      dtype='object')
Len of df 10343
Finished loading csv for this state
--------------------


In [7]:
central_df['State'].unique()

array(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI',
       'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI',
       'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC',
       'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT',
       'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'PR'], dtype=object)