In [1]:
import pandas as pd
import numpy as np
from CreateDF import create_DF
df = create_DF()

  warn("""Cannot parse header or footer so it will be ignored""")
  warn("""Cannot parse header or footer so it will be ignored""")


In [2]:
# --- Create the Taxpayer Efficiency Score ---

# 1. Normalize the input variables to a 0-1 scale
# Normalize Median Income
min_income = df['MEDIAN OHIO ADJUSTED GROSS INCOME'].min()
max_income = df['MEDIAN OHIO ADJUSTED GROSS INCOME'].max()
df['Normalized Median Income'] = (df['MEDIAN OHIO ADJUSTED GROSS INCOME'] - min_income) / (max_income - min_income)

# Normalize Total Local Tax Burden
min_tax = df['Total Local Tax Burden'].min()
max_tax = df['Total Local Tax Burden'].max()
df['Normalized Tax Burden'] = (df['Total Local Tax Burden'] - min_tax) / (max_tax - min_tax)

# 2. Create the composite "Socio-Economic & Tax Burden Index"
# We give equal weight to both normalized components
df['Socio-Economic & Tax Burden Index'] = (df['Normalized Median Income'] + df['Normalized Tax Burden']) / 2

# 3. Calculate the final Taxpayer Efficiency Score
df['Taxpayer Efficiency Score'] = df['Performance Index Score 2023-2024'] / (1 + df['Socio-Economic & Tax Burden Index'])

# You can now view the most efficient districts
print("Top 15 Most Efficient School Districts:")
print(df[['District Name', 'Taxpayer Efficiency Score', 'Performance Index Score 2023-2024', 'MEDIAN OHIO ADJUSTED GROSS INCOME', 'Total Local Tax Burden']].sort_values(by='Taxpayer Efficiency Score', ascending=False).head(15))

Top 15 Most Efficient School Districts:
                                            District Name  \
District IRN                                                
44826                                   Steubenville City   
45633                         Versailles Exempted Village   
49361                     Miller City-New Cleveland Local   
45955                                    New Bremen Local   
49783                                  Fort Loramie Local   
45963                                 New Knoxville Local   
48553                                        Marion Local   
48595                                 Fort Recovery Local   
45419         Hicksville Exempted Village School District   
46680                                   Tri-Village Local   
45948                                       Minster Local   
47225                                   West Geauga Local   
45328                         Columbiana Exempted Village   
47688                                   East 

In [3]:
# --- Create the Performance Opportunity Score (POS) ---

# 1. Normalize the core challenge indicators to a 0-1 scale
# Economic Disadvantage
min_ed = df['Percent of Economically Disadvantaged Students'].min()
max_ed = df['Percent of Economically Disadvantaged Students'].max()
df['Normalized Econ Disadvantage'] = (df['Percent of Economically Disadvantaged Students'] - min_ed) / (max_ed - min_ed)

# Median Income (already normalized in the previous step, but we'll ensure it's here)
min_income = df['MEDIAN OHIO ADJUSTED GROSS INCOME'].min()
max_income = df['MEDIAN OHIO ADJUSTED GROSS INCOME'].max()
df['Normalized Median Income'] = (df['MEDIAN OHIO ADJUSTED GROSS INCOME'] - min_income) / (max_income - min_income)

# 2. Create the weighted "Challenge Index"
# Giving 2/3 weight to economic disadvantage and 1/3 to the inverse of income capacity.
df['Challenge Index'] = ((2 * df['Normalized Econ Disadvantage']) + (1 - df['Normalized Median Income'])) / 3

# 3. Calculate the "Expected Performance Score"
# This creates a sliding scale of expected performance based on the Challenge Index.
min_pi = df['Performance Index Score 2023-2024'].min()
max_pi = df['Performance Index Score 2023-2024'].max()
df['Expected Performance'] = max_pi - (df['Challenge Index'] * (max_pi - min_pi))

# 4. Calculate the final "Performance Opportunity Score"
df['Performance Opportunity Score'] = df['Performance Index Score 2023-2024'] - df['Expected Performance']

# Display the top 15 districts that are over-performing the most
print("Top 15 Districts by Performance Opportunity Score (POS):")
print(df[['District Name', 'Performance Opportunity Score', 'Performance Index Score 2023-2024', 'Expected Performance', 'Percent of Economically Disadvantaged Students']].sort_values(by='Performance Opportunity Score', ascending=False).head(15))

Top 15 Districts by Performance Opportunity Score (POS):
                              District Name  Performance Opportunity Score  \
District IRN                                                                 
44826                     Steubenville City                      54.037736   
50203                       Lordstown Local                      35.187085   
44248                   Logan-Hocking Local                      34.408908   
47928                   Dawson-Bryant Local                      32.399462   
47761                  Oak Hill Union Local                      31.492912   
44149          Ironton City School District                      30.321344   
442                        Manchester Local                      29.809194   
44461                      New Boston Local                      29.261364   
43828                        Coshocton City                      29.079755   
47969                   Symmes Valley Local                      28.774265   
50393  

In [4]:
import numpy as np

# --- Create the Next-Generation Value Score ---

# 1. Normalize all component metrics to a 0-1 scale

# Performance Metrics
min_pi = df['Performance Index Score 2023-2024'].min()
max_pi = df['Performance Index Score 2023-2024'].max()
df['Normalized PI Score'] = (df['Performance Index Score 2023-2024'] - min_pi) / (max_pi - min_pi)

min_att = df['Total Attendance Percent'].min()
max_att = df['Total Attendance Percent'].max()
df['Normalized Attendance'] = (df['Total Attendance Percent'] - min_att) / (max_att - min_att)

min_grad = df['Four Year Graduation Rate - Class of 2023'].min()
max_grad = df['Four Year Graduation Rate - Class of 2023'].max()
df['Normalized Grad Rate'] = (df['Four Year Graduation Rate - Class of 2023'] - min_grad) / (max_grad - min_grad)

# Cost of Living Metrics
min_inc = df['MEDIAN OHIO ADJUSTED GROSS INCOME'].min()
max_inc = df['MEDIAN OHIO ADJUSTED GROSS INCOME'].max()
df['Normalized Median Income'] = (df['MEDIAN OHIO ADJUSTED GROSS INCOME'] - min_inc) / (max_inc - min_inc)

min_tax = df['Total Local Tax Burden'].min()
max_tax = df['Total Local Tax Burden'].max()
df['Normalized Tax Burden'] = (df['Total Local Tax Burden'] - min_tax) / (max_tax - min_tax)

# 2. Create the Composite Scores

# Create the Holistic Performance Score with 3:2:1 weighting (PI:Grad:Att)
df['Holistic Performance Score'] = ((3 * df['Normalized PI Score']) + (2 * df['Normalized Grad Rate']) + (1 * df['Normalized Attendance'])) / 6

# Create the Cost of Living Index (equal weight)
df['Cost of Living Index'] = (df['Normalized Median Income'] + df['Normalized Tax Burden']) / 2

# 3. Calculate the Final, Refined Value Score
# Using square root to dampen the effect of the denominator
df['Final Value Score'] = (df['Holistic Performance Score'] / (np.sqrt(df['Cost of Living Index']) + 0.1)) * 100

# Display the top 15 districts with the new, more balanced score
print("Top 15 Districts by Final Value Score:")
print(df[['District Name', 'Final Value Score', 'Performance Index Score 2023-2024', 'Four Year Graduation Rate - Class of 2023', 'MEDIAN OHIO ADJUSTED GROSS INCOME']].sort_values(by='Final Value Score', ascending=False).head(15))

Top 15 Districts by Final Value Score:
                                District Name  Final Value Score  \
District IRN                                                       
44826                       Steubenville City         195.242645   
50096                  Bloomfield-Mespo Local         176.830331   
45633             Versailles Exempted Village         168.350631   
49361         Miller City-New Cleveland Local         166.313073   
45963                     New Knoxville Local         163.979064   
48595                     Fort Recovery Local         163.385139   
46649                   Franklin Monroe Local         163.234772   
45955                        New Bremen Local         162.873745   
50542                            Dalton Local         161.972548   
49312                    Columbus Grove Local         161.877614   
49783                      Fort Loramie Local         161.871401   
46680                       Tri-Village Local         160.948221   
45328    

In [5]:
import numpy as np

# --- Create the Comprehensive Value Score (Adjusted for Size) ---

# 1. Normalize all component metrics to a 0-1 scale
# Performance Metrics (if not already done)
min_pi = df['Performance Index Score 2023-2024'].min()
max_pi = df['Performance Index Score 2023-2024'].max()
df['Normalized PI Score'] = (df['Performance Index Score 2023-2024'] - min_pi) / (max_pi - min_pi)

min_att = df['Total Attendance Percent'].min()
max_att = df['Total Attendance Percent'].max()
df['Normalized Attendance'] = (df['Total Attendance Percent'] - min_att) / (max_att - min_att)

min_grad = df['Four Year Graduation Rate - Class of 2023'].min()
max_grad = df['Four Year Graduation Rate - Class of 2023'].max()
df['Normalized Grad Rate'] = (df['Four Year Graduation Rate - Class of 2023'] - min_grad) / (max_grad - min_grad)

# Cost of Living & Complexity Metrics
min_inc = df['MEDIAN OHIO ADJUSTED GROSS INCOME'].min()
max_inc = df['MEDIAN OHIO ADJUSTED GROSS INCOME'].max()
df['Normalized Median Income'] = (df['MEDIAN OHIO ADJUSTED GROSS INCOME'] - min_inc) / (max_inc - min_inc)

min_tax = df['Total Local Tax Burden'].min()
max_tax = df['Total Local Tax Burden'].max()
df['Normalized Tax Burden'] = (df['Total Local Tax Burden'] - min_tax) / (max_tax - min_tax)

min_students = df['Total Students'].min()
max_students = df['Total Students'].max()
df['Normalized Total Students'] = (df['Total Students'] - min_students) / (max_students - min_students)


# 2. Create the Composite Scores
# Holistic Performance Score (50% PI, 33% Grad, 17% Att)
df['Holistic Performance Score'] = ((3 * df['Normalized PI Score']) + (2 * df['Normalized Grad Rate']) + (1 * df['Normalized Attendance'])) / 6

# Cost of Living Index
df['Cost of Living Index'] = (df['Normalized Median Income'] + df['Normalized Tax Burden']) / 2

# Cost and Complexity Index (giving 2/3 weight to cost of living)
df['Cost and Complexity Index'] = ((2 * df['Cost of Living Index']) + (1 * df['Normalized Total Students'])) / 3

# 3. Calculate the Final, Comprehensive Value Score
# Using square root to dampen the effect of the denominator
df['Comprehensive Value Score'] = (df['Holistic Performance Score'] / (np.sqrt(df['Cost and Complexity Index']) + 0.1)) * 100

# Display the top 15 districts with the new, comprehensive score
print("Top 15 Districts by Comprehensive Value Score:")
print(df[['District Name', 'Comprehensive Value Score', 'Holistic Performance Score', 'Cost and Complexity Index', 'Total Students']].sort_values(by='Comprehensive Value Score', ascending=False).head(100))
df[['District Name', 'Comprehensive Value Score', 'Holistic Performance Score', 'Cost and Complexity Index', 'Total Students']].sort_values(by='Comprehensive Value Score', ascending=False).to_csv('top_schools.csv',index=False)

Top 15 Districts by Comprehensive Value Score:
                                District Name  Comprehensive Value Score  \
District IRN                                                               
44826                       Steubenville City                 209.712193   
50096                  Bloomfield-Mespo Local                 202.209391   
49361         Miller City-New Cleveland Local                 194.359193   
45633             Versailles Exempted Village                 194.071639   
45963                     New Knoxville Local                 191.613239   
...                                       ...                        ...   
46037           Eastern Local School District                 161.067335   
47514                         Riverdale Local                 160.746182   
49700                   Hopewell-Loudon Local                 160.557036   
48090                         Riverside Local                 160.489247   
44651                       Port Clinton 

In [6]:
# 1. Estimate the total market value of all real property
# In Ohio, taxable value is 35% of market value.
df['Estimated Total Market Value'] = df['Real Property Taxable Value (a)'] / 0.35

# 2. Calculate the estimated average home value using NUMBER OF RETURNS as a proxy for households
# We add 1 to the denominator to avoid any potential division by zero errors.
df['Estimated Average Home Value'] = df['Estimated Total Market Value'] / (df['NUMBER OF RETURNS'] + 1)

# Now you can use 'Estimated Average Home Value' as the 'Median Home Value' in the final metric.
# Let's rename it for clarity and consistency with the previous metric's code.
df.rename(columns={'Estimated Average Home Value': 'Median Home Value'}, inplace=True)


# Display the new column for a few districts
print(df[['District Name', 'Median Home Value', 'Real Property Taxable Value (a)', 'NUMBER OF RETURNS']].head(30))

                                   District Name  Median Home Value  \
District IRN                                                          
442                             Manchester Local      131527.018080   
43489                                 Akron City      118915.487276   
43497                              Alliance City      124661.046427   
43505                               Ashland City      162188.956532   
43513                        Ashtabula Area City      128097.997622   
43521                Athens City School District      189145.929600   
43539                             Barberton City      125301.473807   
43547                           Bay Village City      326596.628635   
43554                             Beachwood City      465804.928289   
43562                               Bedford City      186559.922956   
43570                             Bellaire Local      165354.537333   
43588                         Bellefontaine City      138428.843325   
43596 

In [7]:
import numpy as np
# Assuming 'df' is your fully merged and cleaned DataFrame
# from the previous steps.

# --- Step 1: Calculate Estimated Average Home Value ---
# This step is now included directly in the final calculation flow.
df['Estimated Total Market Value'] = df['Real Property Taxable Value (a)'] / 0.35
df['Median Home Value'] = df['Estimated Total Market Value'] / (df['NUMBER OF RETURNS'] + 1)


# --- Step 2: Create the Affordability & Value Score ---

# A. Calculate the Housing Affordability Ratio
# This is the core of the new cost metric.
df['Housing Affordability Ratio'] = df['Median Home Value'] / (df['MEDIAN OHIO ADJUSTED GROSS INCOME'] + 1)

# B. Normalize all component metrics to a 0-1 scale
# Performance Metrics
min_pi = df['Performance Index Score 2023-2024'].min()
max_pi = df['Performance Index Score 2023-2024'].max()
df['Normalized PI Score'] = (df['Performance Index Score 2023-2024'] - min_pi) / (max_pi - min_pi)

min_att = df['Total Attendance Percent'].min()
max_att = df['Total Attendance Percent'].max()
df['Normalized Attendance'] = (df['Total Attendance Percent'] - min_att) / (max_att - min_att)

min_grad = df['Four Year Graduation Rate - Class of 2023'].min()
max_grad = df['Four Year Graduation Rate - Class of 2023'].max()
df['Normalized Grad Rate'] = (df['Four Year Graduation Rate - Class of 2023'] - min_grad) / (max_grad - min_grad)

# Affordability & Complexity Metrics
min_afford = df['Housing Affordability Ratio'].min()
max_afford = df['Housing Affordability Ratio'].max()
df['Normalized Affordability'] = (df['Housing Affordability Ratio'] - min_afford) / (max_afford - min_afford)

min_tax = df['Total Local Tax Burden'].min()
max_tax = df['Total Local Tax Burden'].max()
df['Normalized Tax Burden'] = (df['Total Local Tax Burden'] - min_tax) / (max_tax - min_tax)

min_students = df['Total Students'].min()
max_students = df['Total Students'].max()
df['Normalized Total Students'] = (df['Total Students'] - min_students) / (max_students - min_students)

# C. Create the Composite Scores
# Holistic Performance Score (50% PI, 33% Grad, 17% Att)
df['Holistic Performance Score'] = ((3 * df['Normalized PI Score']) + (2 * df['Normalized Grad Rate']) + (1 * df['Normalized Attendance'])) / 6

# Affordability & Complexity Index (50% Affordability, 33% Tax, 17% Size)
df['Affordability & Complexity Index'] = ((3 * df['Normalized Affordability']) + (2 * df['Normalized Tax Burden']) + (1 * df['Normalized Total Students'])) / 6

# D. Calculate the Final Score
# Squaring the performance score gives it significantly more weight.
df['Affordability & Value Score'] = (np.power(df['Holistic Performance Score'], 2) / (np.sqrt(df['Affordability & Complexity Index']) + 0.1)) * 100

# E. Display the final results
print("Top 15 Districts by Affordability & Value Score:")
print(df[['District Name', 'Affordability & Value Score', 'Performance Index Score 2023-2024', 'Median Home Value', 'Total Students']].sort_values(by='Affordability & Value Score', ascending=False).head(15))

Top 15 Districts by Affordability & Value Score:
                                District Name  Affordability & Value Score  \
District IRN                                                                 
45955                        New Bremen Local                   240.112123   
49346                            Kalida Local                   232.162580   
49783                      Fort Loramie Local                   229.970510   
49361         Miller City-New Cleveland Local                   227.294309   
44826                       Steubenville City                   223.939103   
45633             Versailles Exempted Village                   221.265624   
49387                         Ottoville Local                   212.739386   
48637                            Newton Local                   210.557068   
49593                      Bloom-Vernon Local                   208.733166   
48553                            Marion Local                   207.573796   
45948          

In [8]:
import numpy as np

# Assuming 'df' is your fully merged and cleaned DataFrame.

# --- Step 1: Create the Input Data ---

# Estimate the total market value of all real property (taxable value is 35% of market value in Ohio)
df['Estimated Total Market Value'] = df['Real Property Taxable Value (a)'] / 0.35

# Estimate the average home value using NUMBER OF RETURNS as a proxy for households
df['Median Home Value'] = df['Estimated Total Market Value'] / (df['NUMBER OF RETURNS'] + 1)

# Calculate the Housing Affordability Ratio
df['Housing Affordability Ratio'] = df['Median Home Value'] / (df['MEDIAN OHIO ADJUSTED GROSS INCOME'] + 1)


# --- Step 2: Normalize All Component Metrics (0-1 Scale) ---

# Performance Metrics
min_pi = df['Performance Index Score 2023-2024'].min()
max_pi = df['Performance Index Score 2023-2024'].max()
df['Normalized PI Score'] = (df['Performance Index Score 2023-2024'] - min_pi) / (max_pi - min_pi)

min_att = df['Total Attendance Percent'].min()
max_att = df['Total Attendance Percent'].max()
df['Normalized Attendance'] = (df['Total Attendance Percent'] - min_att) / (max_att - min_att)

min_grad = df['Four Year Graduation Rate - Class of 2023'].min()
max_grad = df['Four Year Graduation Rate - Class of 2023'].max()
df['Normalized Grad Rate'] = (df['Four Year Graduation Rate - Class of 2023'] - min_grad) / (max_grad - min_grad)

# Affordability & Complexity Metrics
min_afford = df['Housing Affordability Ratio'].min()
max_afford = df['Housing Affordability Ratio'].max()
df['Normalized Affordability'] = (df['Housing Affordability Ratio'] - min_afford) / (max_afford - min_afford)

min_tax = df['Total Local Tax Burden'].min()
max_tax = df['Total Local Tax Burden'].max()
df['Normalized Tax Burden'] = (df['Total Local Tax Burden'] - min_tax) / (max_tax - min_tax)

min_students = df['Total Students'].min()
max_students = df['Total Students'].max()
df['Normalized Total Students'] = (df['Total Students'] - min_students) / (max_students - min_students)


# --- Step 3: Create the Composite Scores ---

# Holistic Performance Score (50% PI, 33% Grad, 17% Att)
df['Holistic Performance Score'] = ((3 * df['Normalized PI Score']) + (2 * df['Normalized Grad Rate']) + (1 * df['Normalized Attendance'])) / 6

# Affordability & Complexity Index (50% Affordability, 33% Tax, 17% Size)
df['Affordability & Complexity Index'] = ((3 * df['Normalized Affordability']) + (2 * df['Normalized Tax Burden']) + (1 * df['Normalized Total Students'])) / 6


# --- Step 4: Calculate the Final Score ---

# Squaring the performance score gives it significantly more weight.
df['Affordability & Value Score'] = (np.power(df['Holistic Performance Score'], 2) / (np.sqrt(df['Affordability & Complexity Index']) + 0.1)) * 100


# --- Step 5: Display the Final Results ---

print("Top 15 Districts by Affordability & Value Score:")
print(df[['District Name', 'Affordability & Value Score', 'Performance Index Score 2023-2024', 'Median Home Value', 'Total Students']].sort_values(by='Affordability & Value Score', ascending=False).head(15))
df[['District Name','County', 'Affordability & Value Score', 'Performance Index Score 2023-2024', 'Median Home Value', 'MEDIAN OHIO ADJUSTED GROSS INCOME','Total Local Tax Burden','Total Students']].sort_values(by='Affordability & Value Score', ascending=False).to_csv('top_schools.csv',index=False)

Top 15 Districts by Affordability & Value Score:
                                District Name  Affordability & Value Score  \
District IRN                                                                 
45955                        New Bremen Local                   240.112123   
49346                            Kalida Local                   232.162580   
49783                      Fort Loramie Local                   229.970510   
49361         Miller City-New Cleveland Local                   227.294309   
44826                       Steubenville City                   223.939103   
45633             Versailles Exempted Village                   221.265624   
49387                         Ottoville Local                   212.739386   
48637                            Newton Local                   210.557068   
49593                      Bloom-Vernon Local                   208.733166   
48553                            Marion Local                   207.573796   
45948          

In [9]:
# rank based on real estate prices and incomes

df[['District Name','County', 'Affordability & Value Score', 'Performance Index Score 2023-2024', 'Median Home Value', 'MEDIAN OHIO ADJUSTED GROSS INCOME','Total Local Tax Burden','Total Students']].sort_values(by='Median Home Value', ascending=False).to_csv('top_real_estate.csv',index=False)
df[['District Name','County', 'Affordability & Value Score', 'Performance Index Score 2023-2024', 'Median Home Value', 'MEDIAN OHIO ADJUSTED GROSS INCOME','Total Local Tax Burden','Total Students']].sort_values(by='MEDIAN OHIO ADJUSTED GROSS INCOME', ascending=False).to_csv('top_income.csv',index=False)