# objectives - model different scenarios for schools and observe predicted changed in student scores 

- load model
- retrain model 
- select schools to model 
- generate alternative key feature values to model 
- observe adjusted score predictions 

In [8]:

import pandas as pd
import numpy as np
import os
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import __version__ as sklearn_version
from sklearn.model_selection import cross_validate
import warnings
warnings.filterwarnings('ignore')

import seaborn as sns

## Load Random Forest Model

In [10]:
expected_model_version = '1.0'
model_path = '../../models/11th_grade_scoring_model.pkl'
if os.path.exists(model_path):
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    if model.version != expected_model_version:
        print("Expected model version doesn't match version loaded")
    if model.sklearn_version != sklearn_version:
        print("Warning: model created under different sklearn version")
else:
    print("Expected model not found")

In [11]:
model.version

'1.0'

In [63]:
df = pd.read_csv('../../data/model/XY_final.csv')
df.drop('Unnamed: 0',inplace=True,axis=1)
labels = df[['County_Name', 'District_Name', 'School_Name']].copy()
df.shape

(52665, 63)

In [65]:
X = df[model.X_columns]
print(X.shape)
y = df['Mean Scale Score']
print(y.shape)

(52665, 59)
(52665,)


# Train on all data 

In [29]:
model.fit(X,y)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('randomforestregressor',
                 RandomForestRegressor(n_estimators=77))])

In [30]:
import sklearn.metrics
sklearn.metrics.SCORERS.keys()

dict_keys(['explained_variance', 'r2', 'max_error', 'neg_median_absolute_error', 'neg_mean_absolute_error', 'neg_mean_absolute_percentage_error', 'neg_mean_squared_error', 'neg_mean_squared_log_error', 'neg_root_mean_squared_error', 'neg_mean_poisson_deviance', 'neg_mean_gamma_deviance', 'accuracy', 'top_k_accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted', 'balanced_accuracy', 'average_precision', 'neg_log_loss', 'neg_brier_score', 'adjusted_rand_score', 'rand_score', 'homogeneity_score', 'completeness_score', 'v_measure_score', 'mutual_info_score', 'adjusted_mutual_info_score', 'normalized_mutual_info_score', 'fowlkes_mallows_score', 'precision', 'precision_macro', 'precision_micro', 'precision_samples', 'precision_weighted', 'recall', 'recall_macro', 'recall_micro', 'recall_samples', 'recall_weighted', 'f1', 'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted', 'jaccard', 'jaccard_macro', 'jaccard_micro', 'jaccard_samples', 'jaccard_wei

In [31]:
cv_results = cross_validate(model, X, y ,
                            scoring ='neg_mean_absolute_error',cv=5)

In [32]:
cv_results

{'fit_time': array([37.820503  , 37.38575912, 35.49316525, 36.75595284, 43.54802608]),
 'score_time': array([0.20788884, 0.19032788, 0.21474099, 0.2351203 , 0.52964878]),
 'test_score': array([-46.47105384, -38.29384011, -32.87780265, -37.94233169,
        -37.32735421])}

In [33]:
mae_mean = np.mean(-1 *cv_results['test_score'])
mae_std = np.std(cv_results['test_score'])
mae_mean, mae_std

(38.58247650073784, 4.400940455720497)

In [None]:
def predict_change( model, school,features, deltas):
    
    adj_school = school.copy()
    for f,d in zip(features, deltas):
        adj_school[f] += d
    return model.predict(adj_school).item() - model.predict(school).item()

## refit model with all data 


plot compare key features to be adjusted 

use predict increase to generate alternative scores due to change in model features 

graph changes in scores due to adjusted features 



In [44]:
labels.columns

Index(['County_Name', 'District_Name', 'School_Name'], dtype='object')

In [53]:
df[df['District_Name'].str.startswith('San')]['District_Name'].unique()

array(['San Ramon Valley Unified', 'Sanger Unified', 'San Marino Unified',
       'Santa Monica-Malibu Unified', 'San Gabriel Unified',
       'San Rafael City High', 'Santa Ana Unified', 'San Jacinto Unified',
       'San Jacinto Valley Academy', 'San Juan Unified',
       'San Benito High', 'San Bernardino County Office of Educatio',
       'San Bernardino City Unified',
       'San Diego County Office of Education',
       'San Diego Workforce Innovation High', 'San Diego Virtual',
       'San Diego Unified', 'San Dieguito Union High',
       'San Diego Mission Academy',
       'San Joaquin County Office of Education',
       'San Luis Obispo County Office of Educati',
       'San Luis Coastal Unified',
       'Santa Clara County Office of Education', 'San Jose Unified',
       'Santa Clara Unified', 'San Lorenzo Valley Unified',
       'Santa Cruz City High'], dtype=object)

In [54]:
df[df['District_Name'].str.startswith('Jef')]['District_Name'].unique()

array(['Jefferson Union High'], dtype=object)

In [58]:
df[df['County_Name'].str.startswith('San Mateo')]['School_Name'].unique()

array(['Oxford Day Academy', 'California Virtual Academy San Mateo',
       'Jefferson High', 'Oceana High', 'Terra Nova High',
       'Westmoor High', 'Baden High (Continuation)', 'El Camino High',
       'South San Francisco High'], dtype=object)

In [59]:
df['District_Name'].nunique()

542

In [60]:
df['County_Name'].nunique()

51

In [74]:
df.columns

Index(['Total Tested with Scores at Reporting Level', 'Charter_School',
       'Free_Meal_count', 'Latitude', 'FTE_Teachers', 'Pupil/Teacher-Ratio',
       'Hispanic_Female', 'Black_or_AA_F', 'APA_F', 'Avg_21',
       'Average Salary Paid', 'Current_Expense_per_ADA', 'est_num_u17_pov',
       'Grocery_3mile', 'Grocery_dist', 'Low Grade_6', 'Low Grade_7',
       'Low Grade_9', 'Low Grade_K', 'Test ID_Mathematics',
       'Demographic Name_Armed forces family member', 'Demographic Name_Asian',
       'Demographic Name_Black or African American',
       'Demographic Name_College graduate',
       'Demographic Name_Declined to state',
       'Demographic Name_EL (English learner)',
       'Demographic Name_ELs enrolled 12 months or more',
       'Demographic Name_EO (English only)',
       'Demographic Name_Economically disadvantaged',
       'Demographic Name_Ever�EL', 'Demographic Name_Female',
       'Demographic Name_Filipino',
       'Demographic Name_Graduate school/Post graduate',
 

In [81]:
df[(df['District_Name'].str.startswith('Jef'))
   &(df['Demographic Name_Filipino']==1)]

Unnamed: 0,Total Tested with Scores at Reporting Level,Charter_School,Free_Meal_count,Latitude,FTE_Teachers,Pupil/Teacher-Ratio,Hispanic_Female,Black_or_AA_F,APA_F,Avg_21,...,District Type_other,Education_type_Continuation School,Education_type_County Community School,Education_type_Juvenile Court School,Education_type_Traditional,Education_type_other,Mean Scale Score,County_Name,District_Name,School_Name
42616,78.0,0,407.0,37.6961,61.06,21.7,260.0,9.0,250.0,1086472.0,...,0.0,0.0,0.0,0.0,1.0,0.0,2623.0,San Mateo,Jefferson Union High,Jefferson High
42631,21.0,0,407.0,37.6961,61.06,21.7,260.0,9.0,250.0,1086472.0,...,0.0,0.0,0.0,0.0,1.0,0.0,2616.4,San Mateo,Jefferson Union High,Jefferson High
42634,57.0,0,407.0,37.6961,61.06,21.7,260.0,9.0,250.0,1086472.0,...,0.0,0.0,0.0,0.0,1.0,0.0,2625.4,San Mateo,Jefferson Union High,Jefferson High
42647,66.0,0,407.0,37.6961,61.06,21.7,260.0,9.0,250.0,1086472.0,...,0.0,0.0,0.0,0.0,1.0,0.0,2589.5,San Mateo,Jefferson Union High,Jefferson High
42660,17.0,0,407.0,37.6961,61.06,21.7,260.0,9.0,250.0,1086472.0,...,0.0,0.0,0.0,0.0,1.0,0.0,2596.2,San Mateo,Jefferson Union High,Jefferson High
42663,49.0,0,407.0,37.6961,61.06,21.7,260.0,9.0,250.0,1086472.0,...,0.0,0.0,0.0,0.0,1.0,0.0,2587.2,San Mateo,Jefferson Union High,Jefferson High
42673,11.0,0,113.0,37.6387,31.65,18.83,71.0,1.0,98.0,1274587.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2625.6,San Mateo,Jefferson Union High,Oceana High
42730,90.0,0,367.0,37.6832,63.66,21.93,162.0,3.0,414.0,1224926.0,...,0.0,0.0,0.0,0.0,1.0,0.0,2651.9,San Mateo,Jefferson Union High,Westmoor High
42744,15.0,0,367.0,37.6832,63.66,21.93,162.0,3.0,414.0,1224926.0,...,0.0,0.0,0.0,0.0,1.0,0.0,2583.2,San Mateo,Jefferson Union High,Westmoor High
42747,75.0,0,367.0,37.6832,63.66,21.93,162.0,3.0,414.0,1224926.0,...,0.0,0.0,0.0,0.0,1.0,0.0,2665.6,San Mateo,Jefferson Union High,Westmoor High


In [61]:
# df['District_Name'].unique()

In [82]:
oceana = df[df['School_Name'].str.startswith('Oceana')]
oceana

Unnamed: 0,Total Tested with Scores at Reporting Level,Charter_School,Free_Meal_count,Latitude,FTE_Teachers,Pupil/Teacher-Ratio,Hispanic_Female,Black_or_AA_F,APA_F,Avg_21,...,District Type_other,Education_type_Continuation School,Education_type_County Community School,Education_type_Juvenile Court School,Education_type_Traditional,Education_type_other,Mean Scale Score,County_Name,District_Name,School_Name
42665,50.0,0,113.0,37.6387,31.65,18.83,71.0,1.0,98.0,1274587.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2671.3,San Mateo,Jefferson Union High,Oceana High
42666,29.0,0,113.0,37.6387,31.65,18.83,71.0,1.0,98.0,1274587.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2691.4,San Mateo,Jefferson Union High,Oceana High
42667,21.0,0,113.0,37.6387,31.65,18.83,71.0,1.0,98.0,1274587.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2643.6,San Mateo,Jefferson Union High,Oceana High
42668,46.0,0,113.0,37.6387,31.65,18.83,71.0,1.0,98.0,1274587.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2687.4,San Mateo,Jefferson Union High,Oceana High
42669,50.0,0,113.0,37.6387,31.65,18.83,71.0,1.0,98.0,1274587.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2671.3,San Mateo,Jefferson Union High,Oceana High
42670,11.0,0,113.0,37.6387,31.65,18.83,71.0,1.0,98.0,1274587.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2663.3,San Mateo,Jefferson Union High,Oceana High
42671,50.0,0,113.0,37.6387,31.65,18.83,71.0,1.0,98.0,1274587.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2671.3,San Mateo,Jefferson Union High,Oceana High
42672,50.0,0,113.0,37.6387,31.65,18.83,71.0,1.0,98.0,1274587.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2671.3,San Mateo,Jefferson Union High,Oceana High
42673,11.0,0,113.0,37.6387,31.65,18.83,71.0,1.0,98.0,1274587.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2625.6,San Mateo,Jefferson Union High,Oceana High
42674,11.0,0,113.0,37.6387,31.65,18.83,71.0,1.0,98.0,1274587.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2713.0,San Mateo,Jefferson Union High,Oceana High


In [95]:
oceana[oceana['Demographic Name_Filipino']==1][:20]

Unnamed: 0,Total Tested with Scores at Reporting Level,Charter_School,Free_Meal_count,Latitude,FTE_Teachers,Pupil/Teacher-Ratio,Hispanic_Female,Black_or_AA_F,APA_F,Avg_21,...,District Type_other,Education_type_Continuation School,Education_type_County Community School,Education_type_Juvenile Court School,Education_type_Traditional,Education_type_other,Mean Scale Score,County_Name,District_Name,School_Name
42673,11.0,0,113.0,37.6387,31.65,18.83,71.0,1.0,98.0,1274587.0,...,0.0,0.0,0.0,0.0,0.0,0.0,2625.6,San Mateo,Jefferson Union High,Oceana High


In [96]:
oceana.columns

Index(['Total Tested with Scores at Reporting Level', 'Charter_School',
       'Free_Meal_count', 'Latitude', 'FTE_Teachers', 'Pupil/Teacher-Ratio',
       'Hispanic_Female', 'Black_or_AA_F', 'APA_F', 'Avg_21',
       'Average Salary Paid', 'Current_Expense_per_ADA', 'est_num_u17_pov',
       'Grocery_3mile', 'Grocery_dist', 'Low Grade_6', 'Low Grade_7',
       'Low Grade_9', 'Low Grade_K', 'Test ID_Mathematics',
       'Demographic Name_Armed forces family member', 'Demographic Name_Asian',
       'Demographic Name_Black or African American',
       'Demographic Name_College graduate',
       'Demographic Name_Declined to state',
       'Demographic Name_EL (English learner)',
       'Demographic Name_ELs enrolled 12 months or more',
       'Demographic Name_EO (English only)',
       'Demographic Name_Economically disadvantaged',
       'Demographic Name_Ever�EL', 'Demographic Name_Female',
       'Demographic Name_Filipino',
       'Demographic Name_Graduate school/Post graduate',
 

In [110]:
oceana[oceana['Demographic Name_Filipino']==1].loc[:,'Total Tested with Scores at Reporting Level':'Mean Scale Score'].T

Unnamed: 0,42673
Total Tested with Scores at Reporting Level,11.0
Charter_School,0.0
Free_Meal_count,113.0
Latitude,37.6387
FTE_Teachers,31.65
Pupil/Teacher-Ratio,18.83
Hispanic_Female,71.0
Black_or_AA_F,1.0
APA_F,98.0
Avg_21,1274587.0
