In [5]:
import pandas as pd
import numpy as np
import sqlite3

# Load the dataset
df = pd.read_csv('Census_Data.csv')

# Check for missing values and data types
print(df.info())

# Convert columns to float for analysis
columns_to_convert = ['PERCENT OF HOUSING CROWDED', 'PERCENT HOUSEHOLDS BELOW POVERTY', 'PERCENT AGED 16+ UNEMPLOYED', 'PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA', 'PERCENT AGED UNDER 18 OR OVER 64']
df[columns_to_convert] = df[columns_to_convert].astype(float)

# Prepare data for SQL analysis
conn = sqlite3.connect(':memory:')  # Create a new database in memory
df.to_sql('census_data', conn, index=False, if_exists='replace')

# Description analysis
print(df.describe())

# Correlation analysis
print(df.corr())

# SQL query analysis: Top 5 community areas with highest poverty levels
query = """
SELECT `COMMUNITY AREA NAME`, `PERCENT HOUSEHOLDS BELOW POVERTY`
FROM census_data
ORDER BY `PERCENT HOUSEHOLDS BELOW POVERTY` DESC
LIMIT 5
"""
top_poverty_areas = pd.read_sql_query(query, conn)
print(top_poverty_areas)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 78 entries, 0 to 77
Data columns (total 9 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   Community Area Number                         77 non-null     float64
 1   COMMUNITY AREA NAME                           78 non-null     object 
 2   PERCENT OF HOUSING CROWDED                    78 non-null     float64
 3   PERCENT HOUSEHOLDS BELOW POVERTY              78 non-null     float64
 4   PERCENT AGED 16+ UNEMPLOYED                   78 non-null     float64
 5   PERCENT AGED 25+ WITHOUT HIGH SCHOOL DIPLOMA  78 non-null     float64
 6   PERCENT AGED UNDER 18 OR OVER 64              78 non-null     float64
 7   PER CAPITA INCOME                             78 non-null     int64  
 8   HARDSHIP INDEX                                77 non-null     float64
dtypes: float64(7), int64(1), object(1)
memory usage: 5.6+ KB
None
      

  print(df.corr())
