In [1]:
import requests
import re
import json
import sqlite3

def scrape_election_data_from_view_source(state_name):
    # Construct the URL
    url = f"https://www.nbcnews.com/politics/2024-elections/{state_name}-president-results"

    # Fetch the page source
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to fetch data for {state_name}: {response.status_code}")
        return []

    # Look for JSON-like structure in the page source
    try:
        # Regex pattern to extract the JSON array with county-level data
        json_pattern = re.compile(
            r'\[\{"name":"[^"]+","percentIn":\d+\.?\d*,"votes":\d+,"candidates":\[.*?\]\}\]'
        )
        match = json_pattern.search(response.text)
        if not match:
            print(f"No JSON data found for {state_name}")
            return []

        # Parse the JSON
        data = json.loads(match.group(0))
        print(f"Successfully fetched data for {state_name}")
        return data
    except Exception as e:
        print(f"Error parsing data for {state_name}: {e}")
        return []

def save_to_sqlite_single_table(db_name, state_name, data):
    # Connect to SQLite database (or create it)
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()

    # Create a single table if it doesn't exist
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS election_results (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            state TEXT,
            county_name TEXT,
            percent_in REAL,
            total_votes INTEGER,
            candidate_name TEXT,
            candidate_party TEXT,
            candidate_votes INTEGER,
            candidate_percent_vote REAL
        )
    ''')

    # Insert data into the single table
    for county in data:
        state = state_name
        county_name = county.get('name')
        percent_in = county.get('percentIn')
        total_votes = county.get('votes')

        for candidate in county.get('candidates', []):
            candidate_name = candidate.get('name')
            candidate_party = candidate.get('party')
            candidate_votes = candidate.get('votes')
            candidate_percent_vote = candidate.get('percentVote')

            cursor.execute('''
                INSERT INTO election_results (
                    state, county_name, percent_in, total_votes,
                    candidate_name, candidate_party, candidate_votes, candidate_percent_vote
                )
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                state, county_name, percent_in, total_votes,
                candidate_name, candidate_party, candidate_votes, candidate_percent_vote
            ))

    # Commit and close connection
    conn.commit()
    conn.close()
    print(f"Data for {state_name} saved to {db_name}.")

def scrape_and_save_all_states_single_table():
    # List of all states and D.C., formatted for URLs
    states = [
        "alabama", "alaska", "arizona", "arkansas", "california", "colorado",
        "connecticut", "delaware", "district-of-columbia", "florida", "georgia",
        "hawaii", "idaho", "illinois", "indiana", "iowa", "kansas", "kentucky",
        "louisiana", "maine", "maryland", "massachusetts", "michigan", "minnesota",
        "mississippi", "missouri", "montana", "nebraska", "nevada", "new-hampshire",
        "new-jersey", "new-mexico", "new-york", "north-carolina", "north-dakota",
        "ohio", "oklahoma", "oregon", "pennsylvania", "rhode-island",
        "south-carolina", "south-dakota", "tennessee", "texas", "utah", "vermont",
        "virginia", "washington", "west-virginia", "wisconsin", "wyoming"
    ]

    # Database name
    db_name = "election_results_new.db"

    for state in states:
        print(f"Scraping data for {state}...")
        data = scrape_election_data_from_view_source(state)
        if data:
            save_to_sqlite_single_table(db_name, state, data)
scrape_and_save_all_states_single_table()

Scraping data for alabama...
Successfully fetched data for alabama
Data for alabama saved to election_results_new.db.
Scraping data for alaska...
Successfully fetched data for alaska
Data for alaska saved to election_results_new.db.
Scraping data for arizona...
Successfully fetched data for arizona
Data for arizona saved to election_results_new.db.
Scraping data for arkansas...
Successfully fetched data for arkansas
Data for arkansas saved to election_results_new.db.
Scraping data for california...
Successfully fetched data for california
Data for california saved to election_results_new.db.
Scraping data for colorado...
Successfully fetched data for colorado
Data for colorado saved to election_results_new.db.
Scraping data for connecticut...
Successfully fetched data for connecticut
Data for connecticut saved to election_results_new.db.
Scraping data for delaware...
Successfully fetched data for delaware
Data for delaware saved to election_results_new.db.
Scraping data for district-of

In [2]:
import sqlite3

def view_table_data(db_name):
    # Connect to the database
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()

    # Fetch the first 10 rows from the election_results table
    cursor.execute('''
        SELECT * FROM election_results LIMIT 10;
    ''')
    rows = cursor.fetchall()

    # Get column names
    column_names = [description[0] for description in cursor.description]

    # Print column names and rows
    print("Column Names:")
    print(column_names)
    print("\nTable Data:")
    for row in rows:
        print(row)

    # Close the connection
    conn.close()

# Specify your database name
db_name = "election_results_new.db"
view_table_data(db_name)

Column Names:
['id', 'state', 'county_name', 'percent_in', 'total_votes', 'candidate_name', 'candidate_party', 'candidate_votes', 'candidate_percent_vote']

Table Data:
(1, 'alabama', 'Autauga', 95.0, 28281, 'Donald Trump', 'gop', 20484, 72.4)
(2, 'alabama', 'Autauga', 95.0, 28281, 'Kamala Harris', 'dem', 7439, 26.3)
(3, 'alabama', 'Autauga', 95.0, 28281, 'Robert F. Kennedy Jr.', 'ind', 154, 0.5)
(4, 'alabama', 'Autauga', 95.0, 28281, 'Write-ins', 'other', 91, 0.3)
(5, 'alabama', 'Autauga', 95.0, 28281, 'Chase Oliver', 'ind', 65, 0.2)
(6, 'alabama', 'Autauga', 95.0, 28281, 'Jill Stein', 'ind', 48, 0.2)
(7, 'alabama', 'Baldwin', 99.0, 122249, 'Donald Trump', 'gop', 95798, 78.4)
(8, 'alabama', 'Baldwin', 99.0, 122249, 'Kamala Harris', 'dem', 24934, 20.4)
(9, 'alabama', 'Baldwin', 99.0, 122249, 'Robert F. Kennedy Jr.', 'ind', 664, 0.5)
(10, 'alabama', 'Baldwin', 99.0, 122249, 'Write-ins', 'other', 441, 0.4)


In [3]:
import pandas as pd
election_data = pd.read_csv("countypres_2000-2020.csv")

election_data_cleaned = election_data[
    ['year', 'state', 'state_po', 'county_name', 'county_fips', 'candidate', 
     'party', 'candidatevotes', 'totalvotes']
].dropna()

election_data_cleaned = election_data_cleaned.rename(columns={
    'state_po': 'state_abbreviation',
    'county_fips': 'fips_code',
    'candidatevotes': 'votes_candidate',
    'totalvotes': 'votes_total'
})

election_data_cleaned

Unnamed: 0,year,state,state_abbreviation,county_name,fips_code,candidate,party,votes_candidate,votes_total
0,2000,ALABAMA,AL,AUTAUGA,1001.0,AL GORE,DEMOCRAT,4942,17208
1,2000,ALABAMA,AL,AUTAUGA,1001.0,GEORGE W. BUSH,REPUBLICAN,11993,17208
2,2000,ALABAMA,AL,AUTAUGA,1001.0,RALPH NADER,GREEN,160,17208
3,2000,ALABAMA,AL,AUTAUGA,1001.0,OTHER,OTHER,113,17208
4,2000,ALABAMA,AL,BALDWIN,1003.0,AL GORE,DEMOCRAT,13997,56480
...,...,...,...,...,...,...,...,...,...
72612,2020,WYOMING,WY,WASHAKIE,56043.0,DONALD J TRUMP,REPUBLICAN,3245,4032
72613,2020,WYOMING,WY,WESTON,56045.0,JOSEPH R BIDEN JR,DEMOCRAT,360,3560
72614,2020,WYOMING,WY,WESTON,56045.0,JO JORGENSEN,LIBERTARIAN,46,3560
72615,2020,WYOMING,WY,WESTON,56045.0,OTHER,OTHER,47,3560


In [4]:
import sqlite3

# Define the database and table name
db_name = "election_results_new.db"
table_name = "cleaned_election_data"

# Connect to the database
conn = sqlite3.connect(db_name)

# Save the cleaned DataFrame into the database as a new table
election_data_cleaned.to_sql(table_name, conn, if_exists="replace", index=False)

print(f"Cleaned data saved to the '{table_name}' table in the database '{db_name}'.")

# Close the connection
conn.close()

Cleaned data saved to the 'cleaned_election_data' table in the database 'election_results_new.db'.


In [5]:
import sqlite3

def view_table_data(db_name):
    # Connect to the database
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()

    # Fetch the first 10 rows from the election_results table
    cursor.execute('''
        SELECT * FROM cleaned_election_data LIMIT 10;
    ''')
    rows = cursor.fetchall()

    # Get column names
    column_names = [description[0] for description in cursor.description]

    # Print column names and rows
    print("Column Names:")
    print(column_names)
    print("\nTable Data:")
    for row in rows:
        print(row)

    # Close the connection
    conn.close()

# Specify your database name
db_name = "election_results_new.db"
view_table_data(db_name)


Column Names:
['year', 'state', 'state_abbreviation', 'county_name', 'fips_code', 'candidate', 'party', 'votes_candidate', 'votes_total']

Table Data:
(2000, 'ALABAMA', 'AL', 'AUTAUGA', 1001.0, 'AL GORE', 'DEMOCRAT', 4942, 17208)
(2000, 'ALABAMA', 'AL', 'AUTAUGA', 1001.0, 'GEORGE W. BUSH', 'REPUBLICAN', 11993, 17208)
(2000, 'ALABAMA', 'AL', 'AUTAUGA', 1001.0, 'RALPH NADER', 'GREEN', 160, 17208)
(2000, 'ALABAMA', 'AL', 'AUTAUGA', 1001.0, 'OTHER', 'OTHER', 113, 17208)
(2000, 'ALABAMA', 'AL', 'BALDWIN', 1003.0, 'AL GORE', 'DEMOCRAT', 13997, 56480)
(2000, 'ALABAMA', 'AL', 'BALDWIN', 1003.0, 'GEORGE W. BUSH', 'REPUBLICAN', 40872, 56480)
(2000, 'ALABAMA', 'AL', 'BALDWIN', 1003.0, 'RALPH NADER', 'GREEN', 1033, 56480)
(2000, 'ALABAMA', 'AL', 'BALDWIN', 1003.0, 'OTHER', 'OTHER', 578, 56480)
(2000, 'ALABAMA', 'AL', 'BARBOUR', 1005.0, 'AL GORE', 'DEMOCRAT', 5188, 10395)
(2000, 'ALABAMA', 'AL', 'BARBOUR', 1005.0, 'GEORGE W. BUSH', 'REPUBLICAN', 5096, 10395)


In [6]:
unemployment_data = pd.read_csv("Unemployment(UnemploymentMedianIncome).csv")
unemployment_cols = [col for col in unemployment_data.columns if 'Unemployment_rate_' in col]

# Melt the DataFrame
unemp_long = unemployment_data.melt(
    id_vars=['FIPS_Code', 'State', 'Area_Name'],
    value_vars=unemployment_cols,
    var_name='year_var',
    value_name='unemp_rate'
)

# Extract the year from 'year_var' and drop rows with NaN in 'unemp_rate'
unemp_long['year'] = unemp_long['year_var'].str.extract(r'(\d{4})').astype(int)
unemp_long = unemp_long.dropna(subset=['unemp_rate'])

# Exclude rows where FIPS_Code is 0 or ends in '00'
unemp_long = unemp_long[(unemp_long['FIPS_Code'] != 0) & (~unemp_long['FIPS_Code'].astype(str).str.endswith('00'))]

# Rename columns
unemp_long = unemp_long.rename(columns={'FIPS_Code': 'fips_code'})

unemp_long

Unnamed: 0,fips_code,State,Area_Name,year_var,unemp_rate,year
2,1001,AL,"Autauga County, AL",Unemployment_rate_2000,4.1,2000
3,1003,AL,"Baldwin County, AL",Unemployment_rate_2000,3.7,2000
4,1005,AL,"Barbour County, AL",Unemployment_rate_2000,5.6,2000
5,1007,AL,"Bibb County, AL",Unemployment_rate_2000,5.4,2000
6,1009,AL,"Blount County, AL",Unemployment_rate_2000,3.5,2000
...,...,...,...,...,...,...
75366,72145,PR,"Vega Baja Municipio, PR",Unemployment_rate_2022,7.4,2022
75367,72147,PR,"Vieques Municipio, PR",Unemployment_rate_2022,6.9,2022
75368,72149,PR,"Villalba Municipio, PR",Unemployment_rate_2022,10.4,2022
75369,72151,PR,"Yabucoa Municipio, PR",Unemployment_rate_2022,8.3,2022


In [7]:
table_name = "unemployment_data"

conn = sqlite3.connect(db_name)

unemp_long.to_sql(table_name, conn, if_exists="replace", index=False)

print(f"Unemployment data saved to the '{table_name}' table in the database '{db_name}'.")

# Close the connection
conn.close()


Unemployment data saved to the 'unemployment_data' table in the database 'election_results_new.db'.


In [8]:
import sqlite3

def view_table_data(db_name):
    # Connect to the database
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()

    # Fetch the first 10 rows from the election_results table
    cursor.execute('''
        SELECT * FROM unemployment_data LIMIT 10;
    ''')
    rows = cursor.fetchall()

    # Get column names
    column_names = [description[0] for description in cursor.description]

    # Print column names and rows
    print("Column Names:")
    print(column_names)
    print("\nTable Data:")
    for row in rows:
        print(row)

    # Close the connection
    conn.close()

# Specify your database name
db_name = "election_results_new.db"
view_table_data(db_name)


Column Names:
['fips_code', 'State', 'Area_Name', 'year_var', 'unemp_rate', 'year']

Table Data:
(1001, 'AL', 'Autauga County, AL', 'Unemployment_rate_2000', 4.1, 2000)
(1003, 'AL', 'Baldwin County, AL', 'Unemployment_rate_2000', 3.7, 2000)
(1005, 'AL', 'Barbour County, AL', 'Unemployment_rate_2000', 5.6, 2000)
(1007, 'AL', 'Bibb County, AL', 'Unemployment_rate_2000', 5.4, 2000)
(1009, 'AL', 'Blount County, AL', 'Unemployment_rate_2000', 3.5, 2000)
(1011, 'AL', 'Bullock County, AL', 'Unemployment_rate_2000', 8.5, 2000)
(1013, 'AL', 'Butler County, AL', 'Unemployment_rate_2000', 7.9, 2000)
(1015, 'AL', 'Calhoun County, AL', 'Unemployment_rate_2000', 5.1, 2000)
(1017, 'AL', 'Chambers County, AL', 'Unemployment_rate_2000', 4.4, 2000)
(1019, 'AL', 'Cherokee County, AL', 'Unemployment_rate_2000', 4.5, 2000)


In [9]:
# File paths
file_2000_2010 = 'population-2000-2010.csv'
file_2010_2020 = 'population-2010-2020.csv'
file_2020_2023 = 'population-2020-2023.csv'

# Read the CSV files
pop_2000_2010 = pd.read_csv(file_2000_2010, encoding='ISO-8859-1')
pop_2010_2020 = pd.read_csv(file_2010_2020, encoding='ISO-8859-1')
pop_2020_2023 = pd.read_csv(file_2020_2023, encoding='ISO-8859-1')

# Define columns to keep for each time range
columns_to_keep_2000 = ['STATE', 'COUNTY', 'STNAME'] + [f'POPESTIMATE{year}' for year in range(2000, 2011)]
columns_to_keep_2010 = ['STATE', 'COUNTY', 'STNAME'] + [f'POPESTIMATE{year}' for year in range(2011, 2021)]  # Exclude 2010
columns_to_keep_2020 = ['STATE', 'COUNTY', 'STNAME'] + [f'POPESTIMATE{year}' for year in range(2021, 2023)]  # Exclude 2020

# Filter the columns for each dataset
pop_2000_2010_filtered = pop_2000_2010[columns_to_keep_2000]
pop_2010_2020_filtered = pop_2010_2020[columns_to_keep_2010]
pop_2020_2023_filtered = pop_2020_2023[columns_to_keep_2020]

# Reset index before merging
pop_2000_2010_filtered = pop_2000_2010_filtered.reset_index(drop=True)
pop_2010_2020_filtered = pop_2010_2020_filtered.reset_index(drop=True)
pop_2020_2023_filtered = pop_2020_2023_filtered.reset_index(drop=True)

# Merge 2000-2010 with 2010-2020
combined_df = pd.merge(
    pop_2000_2010_filtered,
    pop_2010_2020_filtered,
    on=['STATE', 'COUNTY', 'STNAME'],
    how='inner'  # Change to 'outer' if you want to retain all rows
)

# Merge the above result with 2020-2023
combined_df = pd.merge(
    combined_df,
    pop_2020_2023_filtered,
    on=['STATE', 'COUNTY', 'STNAME'],
    how='inner'  # Change to 'outer' if you want to retain all rows
)

# Filter out rows where COUNTY is 0
combined_df = combined_df[combined_df['COUNTY'] != 0]

# Add a new column for the FIPS code
combined_df['FIPS'] = combined_df['STATE'].astype(str).str.zfill(1) + combined_df['COUNTY'].astype(str).str.zfill(3)

# Reset the index after filtering
combined_df = combined_df.reset_index(drop=True)

# Optional: Sort the DataFrame by STATE and COUNTY
combined_df = combined_df.sort_values(by=['STATE', 'COUNTY']).reset_index(drop=True)

# Save the updated DataFrame as a CSV file
output_file_fips = 'combined_population_with_fips.csv'
combined_df.to_csv(output_file_fips, index=False, encoding='utf-8')

print(f"Filtered DataFrame with FIPS codes saved as {output_file_fips}")
combined_df

Filtered DataFrame with FIPS codes saved as combined_population_with_fips.csv


Unnamed: 0,STATE,COUNTY,STNAME,POPESTIMATE2000,POPESTIMATE2001,POPESTIMATE2002,POPESTIMATE2003,POPESTIMATE2004,POPESTIMATE2005,POPESTIMATE2006,...,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE2020,POPESTIMATE2021,POPESTIMATE2022,FIPS
0,1,1,Alabama,44021,44889,45909,46800,48366,49676,51328,...,54922,54903,55302,55448,55533,55769,56145,59203,59726,1001
1,1,3,Alabama,141342,144875,147957,151509,156266,162183,168121,...,199306,203101,207787,212737,218071,223565,229287,239439,246531,1003
2,1,5,Alabama,29015,28863,28653,28594,28287,28027,27861,...,26768,26300,25828,25169,24887,24657,24589,24533,24700,1005
3,1,7,Alabama,19913,21028,21199,21399,21721,22042,22099,...,22541,22553,22590,22532,22300,22313,22136,22359,21986,1007
4,1,9,Alabama,51107,51845,52551,53457,54124,54624,55485,...,57536,57535,57487,57801,57770,57840,57879,59079,59516,1009
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3126,56,37,Wyoming,37552,36899,37428,37450,38026,38739,39749,...,44996,44780,44319,43663,43188,42917,42673,41626,41374,56037
3127,56,39,Wyoming,18381,18653,18837,19066,19467,19632,20014,...,22801,23083,23255,23383,23261,23385,23497,23605,23297,56039
3128,56,41,Wyoming,19666,19413,19587,19480,19470,19494,19709,...,20835,20777,20711,20449,20299,20196,20215,20681,20727,56041
3129,56,43,Wyoming,8252,8068,7988,7976,7960,8022,7979,...,8277,8282,8180,8013,7886,7824,7760,7719,7724,56043


In [10]:
table_name = "population_data"

# Connect to the SQLite database
conn = sqlite3.connect(db_name)

# Save the population data into the database
combined_df.to_sql(table_name, conn, if_exists="replace", index=False)

print(f"Population data saved to the '{table_name}' table in the database '{db_name}'.")

# Close the connection
conn.close()

Population data saved to the 'population_data' table in the database 'election_results_new.db'.


In [11]:
import sqlite3

def view_table_data(db_name):
    # Connect to the database
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()

    # Fetch the first 10 rows from the election_results table
    cursor.execute('''
        SELECT * FROM population_data LIMIT 10;
    ''')
    rows = cursor.fetchall()

    # Get column names
    column_names = [description[0] for description in cursor.description]

    # Print column names and rows
    print("Column Names:")
    print(column_names)
    print("\nTable Data:")
    for row in rows:
        print(row)

    # Close the connection
    conn.close()

# Specify your database name
db_name = "election_results_new.db"
view_table_data(db_name)


Column Names:
['STATE', 'COUNTY', 'STNAME', 'POPESTIMATE2000', 'POPESTIMATE2001', 'POPESTIMATE2002', 'POPESTIMATE2003', 'POPESTIMATE2004', 'POPESTIMATE2005', 'POPESTIMATE2006', 'POPESTIMATE2007', 'POPESTIMATE2008', 'POPESTIMATE2009', 'POPESTIMATE2010', 'POPESTIMATE2011', 'POPESTIMATE2012', 'POPESTIMATE2013', 'POPESTIMATE2014', 'POPESTIMATE2015', 'POPESTIMATE2016', 'POPESTIMATE2017', 'POPESTIMATE2018', 'POPESTIMATE2019', 'POPESTIMATE2020', 'POPESTIMATE2021', 'POPESTIMATE2022', 'FIPS']

Table Data:
(1, 1, 'Alabama', 44021, 44889, 45909, 46800, 48366, 49676, 51328, 52405, 53277, 54135, 54632, 55229, 54970, 54747, 54922, 54903, 55302, 55448, 55533, 55769, 56145, 59203, 59726, '1001')
(1, 3, 'Alabama', 141342, 144875, 147957, 151509, 156266, 162183, 168121, 172404, 175827, 179406, 183195, 186579, 190203, 194978, 199306, 203101, 207787, 212737, 218071, 223565, 229287, 239439, 246531, '1003')
(1, 5, 'Alabama', 29015, 28863, 28653, 28594, 28287, 28027, 27861, 27757, 27808, 27657, 27411, 27344,

In [12]:
# Add education data
file_path = "Education.xlsx"
education_df = pd.read_excel(file_path, engine='openpyxl', header=3)
education_df = education_df.rename(columns={'FIPS Code': 'fips_code'})

# Columns to select for bachelor's degree or higher
bachelors_cols = [
    "Percent of adults with a bachelor's degree or higher, 1990",
    "Percent of adults with a bachelor's degree or higher, 2000",
    "Percent of adults with a bachelor's degree or higher, 2008-12",
    "Percent of adults with a bachelor's degree or higher, 2018-22"
]

# Columns to select for high school diploma only
hs_cols = [
    "Percent of adults with a high school diploma only, 1990",
    "Percent of adults with a high school diploma only, 2000",
    "Percent of adults with a high school diploma only, 2008-12",
    "Percent of adults with a high school diploma only, 2018-22"
]

edu_cols = ['fips_code'] + bachelors_cols + hs_cols
education_df = education_df[edu_cols]

# Rename columns for clarity
education_df = education_df.rename(columns={
    "Percent of adults with a bachelor's degree or higher, 1990": "bach_1990",
    "Percent of adults with a bachelor's degree or higher, 2000": "bach_2000",
    "Percent of adults with a bachelor's degree or higher, 2008-12": "bach_2008_12",
    "Percent of adults with a bachelor's degree or higher, 2018-22": "bach_2018_22",
    "Percent of adults with a high school diploma only, 1990": "hs_1990",
    "Percent of adults with a high school diploma only, 2000": "hs_2000",
    "Percent of adults with a high school diploma only, 2008-12": "hs_2008_12",
    "Percent of adults with a high school diploma only, 2018-22": "hs_2018_22"
})
# Filter out rows where fips_code is 0 (US) or ends with '00' (states)
education_df = education_df[
    (education_df['fips_code'] != 0) & 
    (~education_df['fips_code'].astype(str).str.endswith('00'))
]

# Reset index for the cleaned DataFrame
education_df = education_df.reset_index(drop=True)

# Display the cleaned DataFrame
education_df

Unnamed: 0,fips_code,bach_1990,bach_2000,bach_2008_12,bach_2018_22,hs_1990,hs_2000,hs_2008_12,hs_2018_22
0,1001,14.5,18.0,21.707831,29.558575,32.0,33.8,33.786706,31.146113
1,1003,16.8,23.1,27.741591,32.561579,31.8,29.6,28.816463,27.775383
2,1005,11.8,10.9,14.524286,11.881188,27.1,32.4,33.294700,36.814710
3,1007,4.7,7.1,8.996005,10.919937,33.8,35.7,41.609744,40.879121
4,1009,7.0,9.6,12.381469,14.741407,34.7,36.0,36.157404,35.313717
...,...,...,...,...,...,...,...,...,...
3232,72145,8.6,14.4,17.584094,25.094723,21.3,20.6,15.068387,29.239957
3233,72147,6.7,10.1,10.140029,14.542131,20.2,26.5,33.719620,46.418481
3234,72149,10.2,12.8,15.207082,21.621274,21.3,25.0,35.959532,34.408602
3235,72151,8.7,12.1,12.912144,18.433569,20.0,23.5,30.389885,25.887896


In [13]:
table_name = "education_data"

# Connect to the SQLite database
conn = sqlite3.connect(db_name)

# Save the education data into the database
education_df.to_sql(table_name, conn, if_exists="replace", index=False)

print(f"Education data saved to the '{table_name}' table in the database '{db_name}'.")

# Close the connection
conn.close()

Education data saved to the 'education_data' table in the database 'election_results_new.db'.


In [14]:
import sqlite3

def view_table_data(db_name):
    # Connect to the database
    conn = sqlite3.connect(db_name)
    cursor = conn.cursor()

    # Fetch the first 10 rows from the election_results table
    cursor.execute('''
        SELECT * FROM education_data LIMIT 10;
    ''')
    rows = cursor.fetchall()

    # Get column names
    column_names = [description[0] for description in cursor.description]

    # Print column names and rows
    print("Column Names:")
    print(column_names)
    print("\nTable Data:")
    for row in rows:
        print(row)

    # Close the connection
    conn.close()

# Specify your database name
db_name = "election_results_new.db"
view_table_data(db_name)


Column Names:
['fips_code', 'bach_1990', 'bach_2000', 'bach_2008_12', 'bach_2018_22', 'hs_1990', 'hs_2000', 'hs_2008_12', 'hs_2018_22']

Table Data:
(1001, 14.5, 18.0, 21.707830639654, 29.558574698915095, 32.0, 33.8, 33.7867061233781, 31.146113267642082)
(1003, 16.8, 23.1, 27.741590778732, 32.561578714181366, 31.8, 29.6, 28.81646250913405, 27.775382883691968)
(1005, 11.8, 10.9, 14.52428616584132, 11.881188118811881, 27.1, 32.4, 33.29470024233485, 36.814710042432814)
(1007, 4.7, 7.1, 8.996004639773167, 10.919937205651491, 33.8, 35.7, 41.60974352364995, 40.879120879120876)
(1009, 7.0, 9.6, 12.38146913675942, 14.741406766788348, 34.7, 36.0, 36.15740381882541, 35.313717323664164)
(1011, 10.0, 7.7, 11.88118811881188, 9.376746785913918, 24.8, 35.2, 36.02640264026402, 41.32196757965344)
(1013, 8.0, 10.4, 12.86840423265393, 13.048394391677972, 29.1, 34.5, 38.79696044315035, 47.97225991255842)
(1015, 14.2, 15.2, 16.01982044110698, 19.153437812773184, 30.0, 32.2, 33.49041543746728, 34.5592986279