# Null Visualizer

*Justin R. Garrard*

### Setup

In [4]:
# Import libraries
import os 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
from ipywidgets import * 

In [5]:
# Declare global variables
DATA_DIR = os.path.join('../data/processed')
DATA_FILE = os.path.join(DATA_DIR, 'processed.csv')
plt.style.use('ggplot')

In [14]:
# Useful functions
def null_counter(df):
    record_nulls = []
    for col in df.columns:
        nulls = df[col].isnull().sum()
        percent_null = round((nulls / df.shape[0]) * 100, 2)
        record_nulls.append([col, nulls, percent_null])
    output = pd.DataFrame(record_nulls, columns=['Attribute', 'Null Count', '% Null'])
    return output

def get_year_range(df):
    year_range = list(df['year'].unique())
    year_range.sort()
    return year_range

def subset_by_states_only(df):
    df = df[df['fips'] <= 56]
    return df

In [15]:
# Load and preview data
edu_df = pd.read_csv(DATA_FILE)
edu_df = subset_by_states_only(edu_df)

nRow, nCol = edu_df.shape
print(f'There are {nRow} rows and {nCol} columns.')
print('')

YEAR_RANGE = get_year_range(edu_df)
print(f'Data spans the years {YEAR_RANGE[0]} to {YEAR_RANGE[-1]}.')
print('')

print('Available columns include:')
display(null_counter(edu_df))

There are 580269 rows and 51 columns.

Data spans the years 1986 to 2018.

Available columns include:


Unnamed: 0,Attribute,Null Count,% Null
0,leaid,0,0.0
1,year,0,0.0
2,read_test_num_valid,436936,75.3
3,read_test_pct_prof_midpt,436936,75.3
4,math_test_num_valid,437025,75.31
5,math_test_pct_prof_midpt,437025,75.31
6,lea_name,16,0.0
7,state_leaid,60,0.01
8,street_location,200472,34.55
9,city_location,200376,34.53


In [16]:
# Interactive Scatterplot for Location Metrics by Year
%matplotlib notebook

year_range = get_year_range(edu_df)

@interact(year=(year_range[0],year_range[-1],1))
def null_explorer(year):
    # Clear any old figures
    plt.close()
    
    # Take a snapshot of the data for the given year
    snapshot = edu_df[edu_df['year'] == year].copy()
    y_pos = np.arange(len(edu_df.columns))
    
    # Make a plot to match states to the chosen metric
    plt.figure(figsize=(8, 8), num='Null Value Explorer Tool')
    plt.barh(list(edu_df.columns), snapshot.isnull().sum())
    plt.xscale("log")

    
interactive_plot = interactive(null_explorer,
                               year=2005)

interactive(children=(IntSlider(value=2002, description='year', max=2018, min=1986), Output()), _dom_classes=(…

In [23]:
tgt_year = 2016

null_df = edu_df[edu_df['year'] == tgt_year].copy()
null_df = null_df[null_df.isnull().any(axis=1)]
display(null_df)
null_df.to_csv('nulls.csv')

Unnamed: 0,leaid,year,read_test_num_valid,read_test_pct_prof_midpt,math_test_num_valid,math_test_pct_prof_midpt,lea_name,state_leaid,street_location,city_location,...,exp_total,exp_current_instruction_total,exp_current_supp_serve_total,exp_current_other,exp_nonelsec,salaries_total,benefits_employee_total,debt_longterm_outstand_beg_FY,enrollment_fall_responsible,enrollment_fall_school
95588,100005,2016,2797.0,28.0,2837.0,35.0,Albertville City,AL-101,107 West Main Street,Albertville,...,50456000.0,25785000.0,15790000.0,3723000.0,849000.0,24804000.0,10088000.0,34381000.0,5447.0,5447.0
95589,100006,2016,2969.0,34.0,2979.0,42.0,Marshall County,AL-048,12380 US Highway 431 S,Guntersville,...,59341000.0,28419000.0,22178000.0,4138000.0,753000.0,30829000.0,12436000.0,27766000.0,5687.0,5687.0
95590,100007,2016,7421.0,58.0,7465.0,67.0,Hoover City,AL-158,2810 Metropolitan Way,Hoover,...,165480000.0,90762000.0,51164000.0,7985000.0,3275000.0,88502000.0,34715000.0,288730000.0,13938.0,13938.0
95591,100008,2016,5578.0,69.0,5599.0,74.0,Madison City,AL-169,211 Celtic Drive,Madison,...,105184000.0,58769000.0,34007000.0,4736000.0,1186000.0,54983000.0,21085000.0,120371000.0,10440.0,10440.0
95592,100011,2016,997.0,33.0,1003.0,37.0,Leeds City,AL-167,1404 8th Street,Leeds,...,22552000.0,10378000.0,7523000.0,1073000.0,287000.0,10319000.0,4006000.0,10449000.0,1973.0,1973.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169220,5680250,2016,,,,,Region V BOCES,WY-2050000,3850 North Wilderness Drive,Wilson,...,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,0.0
169221,5680251,2016,,,,,Wyoming Department of Family Services,WY-7700049,2300 Capitol Avenue,Cheyenne,...,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,0.0,0.0
169222,5680252,2016,,,,,Youth Emergency Services Inc. - Administration...,WY-0370000,706 East Longmont Street,Gillette,...,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,0.0,0.0
169223,5680253,2016,,,,,Wyoming Behavioral Institute - Administration ...,WY-1350000,2521 East 15th Street,Casper,...,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0,0.0,0.0
