In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import copy
#conda install -c conda-forge cufflinks-py
#conda install plotly
import ipywidgets as wg
from IPython.display import display

import cufflinks as cf
import chart_studio.plotly as py
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import inspect
import seaborn as sns

init_notebook_mode(connected=True)
cf.go_offline()
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV, cross_val_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler, Normalizer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, KNNImputer, SimpleImputer
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.base import TransformerMixin, BaseEstimator
pd.options.display.max_columns = 200
pd.options.display.max_rows = 272

In [2]:
from sklearn.feature_selection import SelectKBest, VarianceThreshold

from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, explained_variance_score, mean_absolute_error, make_scorer
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.decomposition import PCA
from joblib import dump, load
from Logger import RegressionLogger

In [3]:
log_transformed = 0
class FuncTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self):
        return self
    
    def transform(self, X, y=None):
        return X
        
    def log_transform(self, X, column):
        col = X[:, column]
        print(col.dtype)
        return np.log(col)
        
    
    def exp_transform(self, X, column):
        col = X[:, log_transformed]
        return np.exp(col)
        
    
    def poly_transform(self, X, y, column, degree):
        col = X[:, column]
        p1 = np.polyfit(col, y, degree)
        return np.polyval(p1, col)
        
    
    def recip_transform(self, X, column):
        col = X[:, column]
        return 1 / col
        
        
    
        

In [4]:
model_log = load("data/model_logging.joblib")

In [5]:
current_model = copy.deepcopy(model_log[0]["model"])

In [6]:
params = {
    "Imputer": "Simple Imputer",
    "Scaler": "Standard Scaler",
    "model_name": "Elastic Net",
    "model": current_model,
    "alpha": 0.1,
    "l1_ratio": 0.5,
}

In [7]:
logger = RegressionLogger(params)

# Now Lets Load our original Data

In [8]:
df = pd.read_csv("data/cleaned_df.csv", index_col="UnitID")

In [9]:
df.drop("Unnamed: 0",axis=1, inplace=True)

In [10]:
df.drop(["City location of institution (HD2019)", "Institution Name"], axis=1, inplace=True)

In [11]:
X = df.iloc[:, :-14]
y = df.iloc[:,-14]

In [12]:
df_black = df.dropna(subset=[df.columns[-7]])

In [13]:
X_black = df_black.iloc[:,:-14]
y_black = df_black.iloc[:, -7]

In [14]:
X_black = pd.get_dummies(X_black, drop_first=True)

In [15]:
X = pd.get_dummies(X, drop_first=True)

In [16]:
X.shape

(2315, 260)

In [17]:
X_black.shape

(2027, 255)

# Lets test against different imputation strategies: 
<ol>
    <li>SimpleImputer</li>
    <li>KNNImputer()</li>
    <li>IterativeImputer()</li>
    <li>Let XG Boost handle imputation</li>
<ol>

### KNN imputation shows improvement so lets try iterative

In [18]:
X.shape

(2315, 260)

In [19]:
# Reset Current Model
current_model = copy.deepcopy(model_log[0]["model"])

In [20]:
num_cols = list(range(len(X.select_dtypes(include=["float"]).columns)))

In [21]:
num_cols

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121]

In [22]:
#update model pipeline 
current_model.steps[0] = ('Imputer', ColumnTransformer(
    [
        ('Impute', KNNImputer(), num_cols)
    ], remainder="passthrough"
))

In [23]:
#define new parameters
params = {
    "Imputer": "KNNImputer",
    "Scaler": "StandardScaler",
    "model": current_model,
    "model_name": "Elastic_Net",
    "alpha": 0.1,
    "l1_ratio": 0.5,   
}

In [24]:
#reset logger paramters
logger.__init__(params)

In [25]:
#train and record results
logger.train_update(current_model, X, y)
logger.train_update(current_model, X_black, y_black, suffix="_black")
logger.save_log("KNN imputation with Elastic Net")
model_log.append(logger.record())

### KNN shows slightly better results now lets try RandomForestRegressor

In [26]:
# reset the current model
rf = copy.deepcopy(model_log[0]["model"])

In [27]:
# update Pipeline
rf_hyperparams = {
    "n_estimators": 100,
    "max_depth": 15,
    
    "min_samples_split": 10,
    "min_samples_leaf": 5
} # hyperparameters for XGBoost


In [28]:
rf_params = {
    "model": rf,
    "model_name": "Random Forest",
    
    "n_estimators": 100,
    "max_depth": 15,
    "max_features": 'auto',
    "min_samples_split": 10,
    "min_samples_leaf": 5
}

In [29]:
rf.steps[2] = ('classifier', RandomForestRegressor(**rf_hyperparams))

In [30]:
#reset logger paramters
logger.__init__(rf_params)

In [31]:
#train and record
logger.train_update(rf, X, y)
logger.train_update(rf, X_black, y_black, "_black")
logger.save_log("Random Forest Tree")
model_log.append(logger.record())

# Tree Based Model Performed Much Better; Lets Try XGB tree

In [32]:
# Reset Current Model
xgb_model = copy.deepcopy(model_log[0]["model"])

In [33]:
# update Pipeline
params = {
    "objective": "reg:squarederror",
    "booster": "gbtree",
    "learning_rate": 0.1,
    "subsample": 1.0,
    "min_child_weight": 1
} # hyperparameters for XGBoost

In [34]:
# update pipeline
xgb_model.steps.pop(0)

xgb_model.steps[1] = ('classifier', xgb.XGBRegressor(**params))

In [35]:
# all hyperparameters for logger
xgb_hyperparams = {
    "model": xgb_model,
    "model_name": "XGBoost Tree",
    
    "objective": "reg:squarederror",
    "booster": "gbtree",
    "learning_rate": 0.1,
    "subsample": 1.0,
    "min_child_weight": 1
}

In [36]:
#reset logger paramters
logger.__init__(xgb_hyperparams)

In [37]:
X_black.shape

(2027, 255)

In [38]:
#train and record
logger.train_update(xgb_model, X_black, y_black, "_black")
logger.train_update(xgb_model, X, y)
logger.save_log("XGBoost with no imputation")
model_log.append(logger.record())

In [39]:
pd.DataFrame(model_log)[["model_name", "mae"]]

Unnamed: 0,model_name,mae
0,Elastic Net,9.888576
1,Elastic_Net,9.670773
2,Random Forest,9.042008
3,XGBoost Tree,8.758335


# Lets Do Feature Engineering and try to get the most accurate model we can 

In [40]:
# create column y_actual and residuals as a df

y_hat = logger.model.predict(X)
resid = y - y_hat
target_df = pd.concat([y, resid], axis=1)
target_df.columns = ["Graduation_Rate", "Residuals"]

# Financial Features

In [41]:

picker1 = wg.Dropdown(
    options=X.columns[0:25],
    value=X.columns[0],
    description='Feature:',
    disabled=False,
)

picker2 = wg.Dropdown(
    options=target_df.columns,
    description='Target:',
    value=target_df.columns[-1],
    disabled=False,
)

ui = wg.HBox([picker1, picker2])

def scatter_residuals(feature, target):
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=([feature, "Residuals"]),
        column_widths=[0.6, 0.4]
    
    )
    
    _ = fig.add_trace(go.Scatter(
        x = X[feature], y=target_df[target], mode='markers'
    ), 1, 1)

    _ = fig.add_trace(go.Scatter(
        x = X[feature], y=target_df["Residuals"], mode='markers'
    ), 1, 2)
    
    _ = display(fig.show())
    
out = wg.interactive_output(scatter_residuals, {'feature': picker1, 'target':picker2})
out.layout.height = '700px'
_ = display(ui, out)

HBox(children=(Dropdown(description='Feature:', options=('Core_Revenues', 'Tuition_And_Fees', 'Government_Gran…

Output(layout=Layout(height='700px'))

## Feature Engineering Ideas

<ol>
    <li>Log all dollar amounts</li>
    <li>PCA on distribution of revenues and distribution of expenses</li>
<ol>

In [42]:
# log of core revenues
transformer = FuncTransformer()

In [43]:
fe_x = transformer.log_transform(X.to_numpy(), 0)

float64


In [44]:
X["log_core_revenues"] = fe_x

In [48]:
X

Unnamed: 0_level_0,Core_Revenues,Tuition_And_Fees,Government_Grants,Private_Gifts,Investment_Return,Sales_And_Services,Other_Revenues,Tuition_And_Fees_As_Dollar_Amount,Government_Grants_As_Dollar_Amount,Private_Gifts_As_Dollar_Amount,Investment_Return_As_Dollar_Amount,Sales_And_Services_As_Dollar_Amount,Other_Revenues_As_Dollar_Amount,Core_Expenses,Instruction_Expenses,Research_Expenses,Public_Service_Expenses,Academic_Support_Expenses,Student_Service_Expenses,Institutional_Support_Expenses,Other_Core_Expenses,Instruction_Expenses_As_Dollar_Amount,Research_Expenses_As_Dollar_Amount,Public_Service_Expenses_As_Dollar_Amount,Academic_Support_Expenses_As_Dollar_Amount,Student_Service_Expenses_As_Dollar_Amount,Institutional_Support_Expenses_As_Dollar_Amount,Other_Core_Expenses_As_Dollar_Amount,Grand total instructional_staff,Grand total men instructional_staff,Grand total women instructional_staff,American Indian or Alaska Native total instructional_staff,American Indian or Alaska Native men instructional_staff,American Indian or Alaska Native women instructional_staff,Asian total instructional_staff,Asian men instructional_staff,Asian women instructional_staff,Black or African American total instructional_staff,Black or African American men instructional_staff,Black or African American women instructional_staff,Hispanic or Latino total instructional_staff,Hispanic or Latino men instructional_staff,Hispanic or Latino women instructional_staff,Native Hawaiian or Other Pacific Islander total instructional_staff,Native Hawaiian or Other Pacific Islander men instructional_staff,Native Hawaiian or Other Pacific Islander women instructional_staff,White total instructional_staff,White men instructional_staff,White women instructional_staff,Two or more races total instructional_staff,Two or more races men instructional_staff,Two or more races women instructional_staff,Race/ethnicity unknown total instructional_staff,Race/ethnicity unknown men instructional_staff,Race/ethnicity unknown women instructional_staff,Nonresident alien total instructional_staff,Nonresident alien men instructional_staff,Nonresident alien women instructional_staff,Grand total men instructional_staff_as_percentage,Grand total women instructional_staff_as_percentage,American Indian or Alaska Native total instructional_staff_as_percentage,American Indian or Alaska Native men instructional_staff_as_percentage,American Indian or Alaska Native women instructional_staff_as_percentage,Asian total instructional_staff_as_percentage,Asian men instructional_staff_as_percentage,Asian women instructional_staff_as_percentage,Black or African American total instructional_staff_as_percentage,Black or African American men instructional_staff_as_percentage,Black or African American women instructional_staff_as_percentage,Hispanic or Latino total instructional_staff_as_percentage,Hispanic or Latino men instructional_staff_as_percentage,Hispanic or Latino women instructional_staff_as_percentage,Native Hawaiian or Other Pacific Islander total instructional_staff_as_percentage,Native Hawaiian or Other Pacific Islander men instructional_staff_as_percentage,Native Hawaiian or Other Pacific Islander women instructional_staff_as_percentage,White total instructional_staff_as_percentage,White men instructional_staff_as_percentage,White women instructional_staff_as_percentage,Two or more races total instructional_staff_as_percentage,Two or more races men instructional_staff_as_percentage,Two or more races women instructional_staff_as_percentage,Race/ethnicity unknown total instructional_staff_as_percentage,Race/ethnicity unknown men instructional_staff_as_percentage,Race/ethnicity unknown women instructional_staff_as_percentage,Nonresident alien total instructional_staff_as_percentage,Nonresident alien men instructional_staff_as_percentage,Nonresident alien women instructional_staff_as_percentage,Percent of full-time first-time undergraduates awarded any financial aid (SFA1819),Percent of full-time first-time undergraduates awarded federal state local or institutional grant aid (SFA1819),Percent of total enrollment that are American Indian or Alaska Native (DRVEF2013_RV),Percent of total enrollment that are Asian/Native Hawaiian/Pacific Islander (DRVEF2013_RV),Percent of total enrollment that are Asian (DRVEF2013_RV),Percent of total enrollment that are Native Hawaiian or Other Pacific Islander (DRVEF2013_RV),Percent of total enrollment that are Black or African American (DRVEF2013_RV),Percent of total enrollment that are Hispanic/Latino (DRVEF2013_RV),Percent of total enrollment that are White (DRVEF2013_RV),Percent of total enrollment that are Race/ethnicity unknown (DRVEF2013_RV),Percent of total enrollment that are Nonresident Alien (DRVEF2013_RV),Percent of total enrollment that are two or more races (DRVEF2013_RV),Percent of total enrollment that are women (DRVEF2013_RV),...,Carnegie Classification 2018: Enrollment Profile (HD2018)_Majority undergraduate,"Carnegie Classification 2018: Enrollment Profile (HD2018)_Not applicable, not in Carnegie universe (not accredited or nondegree-granting)",Carnegie Classification 2018: Enrollment Profile (HD2018)_Very high undergraduate,Carnegie Classification 2018: Enrollment Profile (HD2018)_isMissing,"Carnegie Classification 2018: Size and Setting (HD2018)_Four-year, large, highly residential","Carnegie Classification 2018: Size and Setting (HD2018)_Four-year, large, primarily nonresidential","Carnegie Classification 2018: Size and Setting (HD2018)_Four-year, large, primarily residential","Carnegie Classification 2018: Size and Setting (HD2018)_Four-year, medium, highly residential","Carnegie Classification 2018: Size and Setting (HD2018)_Four-year, medium, primarily nonresidential","Carnegie Classification 2018: Size and Setting (HD2018)_Four-year, medium, primarily residential","Carnegie Classification 2018: Size and Setting (HD2018)_Four-year, small, highly residential","Carnegie Classification 2018: Size and Setting (HD2018)_Four-year, small, primarily nonresidential","Carnegie Classification 2018: Size and Setting (HD2018)_Four-year, small, primarily residential","Carnegie Classification 2018: Size and Setting (HD2018)_Four-year, very small, highly residential","Carnegie Classification 2018: Size and Setting (HD2018)_Four-year, very small, primarily nonresidential","Carnegie Classification 2018: Size and Setting (HD2018)_Four-year, very small, primarily residential","Carnegie Classification 2018: Size and Setting (HD2018)_Not applicable, not in Carnegie universe (not accredited or nondegree-granting)","Carnegie Classification 2018: Size and Setting (HD2018)_Two-year, large","Carnegie Classification 2018: Size and Setting (HD2018)_Two-year, medium","Carnegie Classification 2018: Size and Setting (HD2018)_Two-year, small","Carnegie Classification 2018: Size and Setting (HD2018)_Two-year, very large","Carnegie Classification 2018: Size and Setting (HD2018)_Two-year, very small",Carnegie Classification 2018: Size and Setting (HD2018)_isMissing,Historically Black College or University (HD2018)_Yes,Historically Black College or University (HD2018)_isMissing,"Institution size category (HD2018)_10,000 - 19,999","Institution size category (HD2018)_20,000 and above","Institution size category (HD2018)_5,000 - 9,999","Institution size category (HD2018)_Under 1,000",Institution size category (HD2018)_isMissing,Parent/child indicator - Finance (FLAGS2019)_Child record - reports partial data but other data is included with entity that is not a postsecondary institution,Parent/child indicator - Finance (FLAGS2019)_Not applicable,Parent/child indicator - Finance (FLAGS2019)_Parent record - includes data from branch campuses,Parent/child indicator - Finance (FLAGS2019)_Partial child record - reports revenues/expenses. Assets/liabilties reported with parent,"Sector of institution (HD2018)_Private for-profit, 4-year or above","Sector of institution (HD2018)_Private for-profit, less-than 2-year","Sector of institution (HD2018)_Private not-for-profit, 2-year","Sector of institution (HD2018)_Private not-for-profit, 4-year or above","Sector of institution (HD2018)_Public, 2-year","Sector of institution (HD2018)_Public, 4-year or above",Sector of institution (HD2018)_isMissing,State abbreviation (HD2018)_Alaska,State abbreviation (HD2018)_American Samoa,State abbreviation (HD2018)_Arizona,State abbreviation (HD2018)_Arkansas,State abbreviation (HD2018)_California,State abbreviation (HD2018)_Colorado,State abbreviation (HD2018)_Connecticut,State abbreviation (HD2018)_Delaware,State abbreviation (HD2018)_District of Columbia,State abbreviation (HD2018)_Federated States of Micronesia,State abbreviation (HD2018)_Florida,State abbreviation (HD2018)_Georgia,State abbreviation (HD2018)_Guam,State abbreviation (HD2018)_Hawaii,State abbreviation (HD2018)_Idaho,State abbreviation (HD2018)_Illinois,State abbreviation (HD2018)_Indiana,State abbreviation (HD2018)_Iowa,State abbreviation (HD2018)_Kansas,State abbreviation (HD2018)_Kentucky,State abbreviation (HD2018)_Louisiana,State abbreviation (HD2018)_Maine,State abbreviation (HD2018)_Marshall Islands,State abbreviation (HD2018)_Maryland,State abbreviation (HD2018)_Massachusetts,State abbreviation (HD2018)_Michigan,State abbreviation (HD2018)_Minnesota,State abbreviation (HD2018)_Mississippi,State abbreviation (HD2018)_Missouri,State abbreviation (HD2018)_Montana,State abbreviation (HD2018)_Nebraska,State abbreviation (HD2018)_Nevada,State abbreviation (HD2018)_New Hampshire,State abbreviation (HD2018)_New Jersey,State abbreviation (HD2018)_New Mexico,State abbreviation (HD2018)_New York,State abbreviation (HD2018)_North Carolina,State abbreviation (HD2018)_North Dakota,State abbreviation (HD2018)_Northern Marianas,State abbreviation (HD2018)_Ohio,State abbreviation (HD2018)_Oklahoma,State abbreviation (HD2018)_Oregon,State abbreviation (HD2018)_Pennsylvania,State abbreviation (HD2018)_Puerto Rico,State abbreviation (HD2018)_Rhode Island,State abbreviation (HD2018)_South Carolina,State abbreviation (HD2018)_South Dakota,State abbreviation (HD2018)_Tennessee,State abbreviation (HD2018)_Texas,State abbreviation (HD2018)_Utah,State abbreviation (HD2018)_Vermont,State abbreviation (HD2018)_Virgin Islands,State abbreviation (HD2018)_Virginia,State abbreviation (HD2018)_Washington,State abbreviation (HD2018)_West Virginia,State abbreviation (HD2018)_Wisconsin,State abbreviation (HD2018)_Wyoming,State abbreviation (HD2018)_isMissing,log_core_revenues
UnitID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1
180203,7078959.0,2.0,60.0,7.0,1.0,0.0,29.0,1.415792e+05,4247375.40,495527.13,70789.59,0.0,2052898.11,7294344.0,29.0,3.0,7.0,3.0,8.0,16.0,36.0,2.115360e+06,218830.32,510604.08,218830.32,583547.52,1167095.04,2625963.84,13.0,7.0,6.0,8.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.85,46.15,61.54,30.77,30.77,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,38.46,23.08,15.38,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,85.0,82.0,88.0,0.0,0.0,0.0,0.0,1.0,10.0,0.0,0.0,0.0,57.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15.772637
222178,136379482.0,57.0,4.0,17.0,18.0,0.0,4.0,7.773630e+07,5455179.28,23184511.94,24548306.76,0.0,5455179.28,121513039.0,38.0,1.0,2.0,10.0,26.0,24.0,0.0,4.617495e+07,1215130.39,2430260.78,12151303.90,31593390.14,29163129.36,0.00,265.0,154.0,111.0,1.0,1.0,0.0,3.0,2.0,1.0,14.0,7.0,7.0,13.0,8.0,5.0,0.0,0.0,0.0,228.0,131.0,97.0,3.0,2.0,1.0,0.0,0.0,0.0,3.0,3.0,0.0,58.11,41.89,0.38,0.38,0.00,1.13,0.75,0.38,5.28,2.64,2.64,4.91,3.02,1.89,0.00,0.00,0.00,86.04,49.43,36.60,1.13,0.75,0.38,0.00,0.00,0.00,1.13,1.13,0.00,100.0,100.0,0.0,1.0,1.0,0.0,8.0,11.0,69.0,2.0,4.0,3.0,58.0,...,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,18.730952
138558,70491879.0,17.0,17.0,2.0,0.0,0.0,28.0,1.198362e+07,11983619.43,1409837.58,0.00,0.0,19737726.12,50201124.0,42.0,0.0,3.0,15.0,9.0,19.0,13.0,2.108447e+07,0.00,1506033.72,7530168.60,4518101.16,9538213.56,6526146.12,149.0,77.0,72.0,0.0,0.0,0.0,10.0,5.0,5.0,10.0,7.0,3.0,3.0,2.0,1.0,0.0,0.0,0.0,124.0,62.0,62.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,51.68,48.32,0.00,0.00,0.00,6.71,3.36,3.36,6.71,4.70,2.01,2.01,1.34,0.67,0.00,0.00,0.00,83.22,41.61,41.61,0.00,0.00,0.00,0.67,0.67,0.00,0.67,0.00,0.67,91.0,86.0,0.0,1.0,1.0,0.0,12.0,6.0,78.0,0.0,2.0,1.0,54.0,...,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18.071008
172866,3019292.0,100.0,0.0,0.0,0.0,0.0,0.0,3.019292e+06,0.00,0.00,0.00,0.0,0.00,2767687.0,13.0,0.0,0.0,36.0,11.0,4.0,37.0,3.597993e+05,0.00,0.00,996367.32,304445.57,110707.48,1024044.19,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,100.0,100.0,1.0,9.0,9.0,1.0,14.0,1.0,70.0,3.0,0.0,1.0,21.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14.920533
108232,217471648.0,100.0,0.0,0.0,0.0,0.0,0.0,2.174716e+08,0.00,0.00,0.00,0.0,0.00,211573114.0,52.0,0.0,0.0,0.0,19.0,15.0,14.0,1.100180e+08,0.00,0.00,0.00,40198891.66,31735967.10,29620235.96,219.0,124.0,95.0,0.0,0.0,0.0,22.0,6.0,16.0,4.0,3.0,1.0,8.0,5.0,3.0,3.0,1.0,2.0,141.0,84.0,57.0,2.0,1.0,1.0,39.0,24.0,15.0,0.0,0.0,0.0,56.62,43.38,0.00,0.00,0.00,10.05,2.74,7.31,1.83,1.37,0.46,3.65,2.28,1.37,1.37,0.46,0.91,64.38,38.36,26.03,0.91,0.46,0.46,17.81,10.96,6.85,0.00,0.00,0.00,59.0,49.0,0.0,7.0,7.0,0.0,6.0,8.0,25.0,20.0,31.0,2.0,58.0,...,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19.197579
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
217059,81643910.0,75.0,1.0,7.0,15.0,0.0,1.0,6.123293e+07,816439.10,5715073.70,12246586.50,0.0,816439.10,80282892.0,49.0,0.0,0.0,16.0,15.0,19.0,0.0,3.933862e+07,0.00,0.00,12845262.72,12042433.80,15253749.48,0.00,176.0,90.0,86.0,0.0,0.0,0.0,8.0,5.0,3.0,3.0,2.0,1.0,3.0,1.0,2.0,0.0,0.0,0.0,157.0,80.0,77.0,1.0,0.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,51.14,48.86,0.00,0.00,0.00,4.55,2.84,1.70,1.70,1.14,0.57,1.70,0.57,1.14,0.00,0.00,0.00,89.20,45.45,43.75,0.57,0.00,0.57,1.14,0.57,0.57,1.14,0.57,0.57,100.0,100.0,0.0,1.0,1.0,0.0,5.0,5.0,83.0,3.0,0.0,3.0,55.0,...,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18.217878
141361,16354418.0,57.0,2.0,19.0,20.0,0.0,2.0,9.322018e+06,327088.36,3107339.42,3270883.60,0.0,327088.36,23571670.0,40.0,0.0,0.0,15.0,29.0,16.0,0.0,9.428668e+06,0.00,0.00,3535750.50,6835784.30,3771467.20,0.00,67.0,36.0,31.0,0.0,0.0,0.0,4.0,2.0,2.0,2.0,0.0,2.0,2.0,1.0,1.0,1.0,1.0,0.0,58.0,32.0,26.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.73,46.27,0.00,0.00,0.00,5.97,2.99,2.99,2.99,0.00,2.99,2.99,1.49,1.49,1.49,1.49,0.00,86.57,47.76,38.81,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,99.0,99.0,0.0,1.0,1.0,0.0,6.0,4.0,83.0,3.0,0.0,1.0,58.0,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16.610009
206695,186217912.0,43.0,17.0,7.0,3.0,0.0,8.0,8.007370e+07,31657045.04,13035253.84,5586537.36,0.0,14897432.96,173480835.0,46.0,3.0,4.0,11.0,8.0,16.0,12.0,7.980118e+07,5204425.05,6939233.40,19082891.85,13878466.80,27756933.60,20817700.20,400.0,220.0,180.0,1.0,1.0,0.0,34.0,19.0,15.0,25.0,15.0,10.0,5.0,4.0,1.0,0.0,0.0,0.0,310.0,165.0,145.0,0.0,0.0,0.0,4.0,2.0,2.0,21.0,14.0,7.0,55.00,45.00,0.25,0.25,0.00,8.50,4.75,3.75,6.25,3.75,2.50,1.25,1.00,0.25,0.00,0.00,0.00,77.50,41.25,36.25,0.00,0.00,0.00,1.00,0.50,0.50,5.25,3.50,1.75,98.0,77.0,0.0,1.0,1.0,0.0,14.0,3.0,75.0,4.0,1.0,2.0,54.0,...,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19.042428
204255,18557893.0,29.0,17.0,1.0,0.0,0.0,9.0,5.381789e+06,3154841.81,185578.93,0.00,0.0,1670210.37,15845968.0,48.0,0.0,0.0,7.0,9.0,27.0,9.0,7.606065e+06,0.00,0.00,1109217.76,1426137.12,4278411.36,1426137.12,41.0,20.0,21.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,36.0,17.0,19.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,48.78,51.22,0.00,0.00,0.00,2.44,0.00,2.44,2.44,0.00,2.44,2.44,2.44,0.00,0.00,0.00,0.00,87.80,41.46,46.34,0.00,0.00,0.00,4.88,4.88,0.00,0.00,0.00,0.00,84.0,83.0,0.0,1.0,0.0,0.0,3.0,0.0,81.0,13.0,0.0,2.0,52.0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16.736406



# Staff Features

In [46]:


picker1 = wg.Dropdown(
    options=X.filter(regex="staff").columns,
    value=X.filter(regex="staff").columns[0],
    description='Feature:',
    disabled=False,
)

picker2 = wg.Dropdown(
    options=target_df.columns,
    description='Target:',
    value=target_df.columns[-1],
    disabled=False,
)

ui = wg.HBox([picker1, picker2])

def scatter_residuals(feature, target):
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=([feature, "Residuals"]),
        column_widths=[0.6, 0.4]
    
    )
    
    _ = fig.add_trace(go.Scatter(
        x = X[feature], y=target_df[target], mode='markers'
    ), 1, 1)

    _ = fig.add_trace(go.Scatter(
        x = X[feature], y=target_df["Residuals"], mode='markers'
    ), 1, 2)
    
    _ = display(fig.show())
    
out = wg.interactive_output(scatter_residuals, {'feature': picker1, 'target':picker2})
out.layout.height = '700px'
_ = display(ui, out)

HBox(children=(Dropdown(description='Feature:', options=('Grand total instructional_staff', 'Grand total men i…

Output(layout=Layout(height='700px'))

<ol>
    <li>Log all dollar amounts</li>
    <li>PCA on distribution of revenues and distribution of expenses</li>
<ol>

### Log transformation for core revenues and expenses should help our model. Dollar amounts show the same trend

In [47]:
fig = make_subplots( 
    rows=2, cols=2,
    subplot_titles = ["Tuition and Fees", "Residuals", "Government_Grants", "Residuals"]
)

fig.add_trace(
    go.Scatter(
        x=X.Tuition_And_Fees, y=y, mode='markers',
        range_x=[0,100]
    )
)


ValueError: Invalid property specified for object of type plotly.graph_objs.Scatter: 'range'

Did you mean "line"?

    Valid properties:
        cliponaxis
            Determines whether or not markers and text nodes are
            clipped about the subplot axes. To show markers and
            text nodes above axis lines and tick labels, make sure
            to set `xaxis.layer` and `yaxis.layer` to *below
            traces*.
        connectgaps
            Determines whether or not gaps (i.e. {nan} or missing
            values) in the provided data arrays are connected.
        customdata
            Assigns extra data each datum. This may be useful when
            listening to hover, click and selection events. Note
            that, "scatter" traces also appends customdata items in
            the markers DOM elements
        customdatasrc
            Sets the source reference on Chart Studio Cloud for
            customdata .
        dx
            Sets the x coordinate step. See `x0` for more info.
        dy
            Sets the y coordinate step. See `y0` for more info.
        error_x
            :class:`plotly.graph_objects.scatter.ErrorX` instance
            or dict with compatible properties
        error_y
            :class:`plotly.graph_objects.scatter.ErrorY` instance
            or dict with compatible properties
        fill
            Sets the area to fill with a solid color. Defaults to
            "none" unless this trace is stacked, then it gets
            "tonexty" ("tonextx") if `orientation` is "v" ("h") Use
            with `fillcolor` if not "none". "tozerox" and "tozeroy"
            fill to x=0 and y=0 respectively. "tonextx" and
            "tonexty" fill between the endpoints of this trace and
            the endpoints of the trace before it, connecting those
            endpoints with straight lines (to make a stacked area
            graph); if there is no trace before it, they behave
            like "tozerox" and "tozeroy". "toself" connects the
            endpoints of the trace (or each segment of the trace if
            it has gaps) into a closed shape. "tonext" fills the
            space between two traces if one completely encloses the
            other (eg consecutive contour lines), and behaves like
            "toself" if there is no trace before it. "tonext"
            should not be used if one trace does not enclose the
            other. Traces in a `stackgroup` will only fill to (or
            be filled to) other traces in the same group. With
            multiple `stackgroup`s or some traces stacked and some
            not, if fill-linked traces are not already consecutive,
            the later ones will be pushed down in the drawing
            order.
        fillcolor
            Sets the fill color. Defaults to a half-transparent
            variant of the line color, marker color, or marker line
            color, whichever is available.
        groupnorm
            Only relevant when `stackgroup` is used, and only the
            first `groupnorm` found in the `stackgroup` will be
            used - including if `visible` is "legendonly" but not
            if it is `false`. Sets the normalization for the sum of
            this `stackgroup`. With "fraction", the value of each
            trace at each location is divided by the sum of all
            trace values at that location. "percent" is the same
            but multiplied by 100 to show percentages. If there are
            multiple subplots, or multiple `stackgroup`s on one
            subplot, each will be normalized within its own set.
        hoverinfo
            Determines which trace information appear on hover. If
            `none` or `skip` are set, no information is displayed
            upon hovering. But, if `none` is set, click and hover
            events are still fired.
        hoverinfosrc
            Sets the source reference on Chart Studio Cloud for
            hoverinfo .
        hoverlabel
            :class:`plotly.graph_objects.scatter.Hoverlabel`
            instance or dict with compatible properties
        hoveron
            Do the hover effects highlight individual points
            (markers or line points) or do they highlight filled
            regions? If the fill is "toself" or "tonext" and there
            are no markers or text, then the default is "fills",
            otherwise it is "points".
        hovertemplate
            Template string used for rendering the information that
            appear on hover box. Note that this will override
            `hoverinfo`. Variables are inserted using %{variable},
            for example "y: %{y}" as well as %{xother}, {%_xother},
            {%_xother_}, {%xother_}. When showing info for several
            points, "xother" will be added to those with different
            x positions from the first point. An underscore before
            or after "(x|y)other" will add a space on that side,
            only when this field is shown. Numbers are formatted
            using d3-format's syntax %{variable:d3-format}, for
            example "Price: %{y:$.2f}".
            https://github.com/d3/d3-format/tree/v1.4.5#d3-format
            for details on the formatting syntax. Dates are
            formatted using d3-time-format's syntax
            %{variable|d3-time-format}, for example "Day:
            %{2019-01-01|%A}". https://github.com/d3/d3-time-
            format/tree/v2.2.3#locale_format for details on the
            date formatting syntax. The variables available in
            `hovertemplate` are the ones emitted as event data
            described at this link
            https://plotly.com/javascript/plotlyjs-events/#event-
            data. Additionally, every attributes that can be
            specified per-point (the ones that are `arrayOk: true`)
            are available.  Anything contained in tag `<extra>` is
            displayed in the secondary box, for example
            "<extra>{fullData.name}</extra>". To hide the secondary
            box completely, use an empty tag `<extra></extra>`.
        hovertemplatesrc
            Sets the source reference on Chart Studio Cloud for
            hovertemplate .
        hovertext
            Sets hover text elements associated with each (x,y)
            pair. If a single string, the same string appears over
            all the data points. If an array of string, the items
            are mapped in order to the this trace's (x,y)
            coordinates. To be seen, trace `hoverinfo` must contain
            a "text" flag.
        hovertextsrc
            Sets the source reference on Chart Studio Cloud for
            hovertext .
        ids
            Assigns id labels to each datum. These ids for object
            constancy of data points during animation. Should be an
            array of strings, not numbers or any other type.
        idssrc
            Sets the source reference on Chart Studio Cloud for
            ids .
        legendgroup
            Sets the legend group for this trace. Traces part of
            the same legend group hide/show at the same time when
            toggling legend items.
        legendgrouptitle
            :class:`plotly.graph_objects.scatter.Legendgrouptitle`
            instance or dict with compatible properties
        legendrank
            Sets the legend rank for this trace. Items and groups
            with smaller ranks are presented on top/left side while
            with `*reversed* `legend.traceorder` they are on
            bottom/right side. The default legendrank is 1000, so
            that you can use ranks less than 1000 to place certain
            items before all unranked items, and ranks greater than
            1000 to go after all unranked items.
        line
            :class:`plotly.graph_objects.scatter.Line` instance or
            dict with compatible properties
        marker
            :class:`plotly.graph_objects.scatter.Marker` instance
            or dict with compatible properties
        meta
            Assigns extra meta information associated with this
            trace that can be used in various text attributes.
            Attributes such as trace `name`, graph, axis and
            colorbar `title.text`, annotation `text`
            `rangeselector`, `updatemenues` and `sliders` `label`
            text all support `meta`. To access the trace `meta`
            values in an attribute in the same trace, simply use
            `%{meta[i]}` where `i` is the index or key of the
            `meta` item in question. To access trace `meta` in
            layout attributes, use `%{data[n[.meta[i]}` where `i`
            is the index or key of the `meta` and `n` is the trace
            index.
        metasrc
            Sets the source reference on Chart Studio Cloud for
            meta .
        mode
            Determines the drawing mode for this scatter trace. If
            the provided `mode` includes "text" then the `text`
            elements appear at the coordinates. Otherwise, the
            `text` elements appear on hover. If there are less than
            20 points and the trace is not stacked then the default
            is "lines+markers". Otherwise, "lines".
        name
            Sets the trace name. The trace name appear as the
            legend item and on hover.
        opacity
            Sets the opacity of the trace.
        orientation
            Only relevant when `stackgroup` is used, and only the
            first `orientation` found in the `stackgroup` will be
            used - including if `visible` is "legendonly" but not
            if it is `false`. Sets the stacking direction. With "v"
            ("h"), the y (x) values of subsequent traces are added.
            Also affects the default value of `fill`.
        selected
            :class:`plotly.graph_objects.scatter.Selected` instance
            or dict with compatible properties
        selectedpoints
            Array containing integer indices of selected points.
            Has an effect only for traces that support selections.
            Note that an empty array means an empty selection where
            the `unselected` are turned on for all points, whereas,
            any other non-array values means no selection all where
            the `selected` and `unselected` styles have no effect.
        showlegend
            Determines whether or not an item corresponding to this
            trace is shown in the legend.
        stackgaps
            Only relevant when `stackgroup` is used, and only the
            first `stackgaps` found in the `stackgroup` will be
            used - including if `visible` is "legendonly" but not
            if it is `false`. Determines how we handle locations at
            which other traces in this group have data but this one
            does not. With *infer zero* we insert a zero at these
            locations. With "interpolate" we linearly interpolate
            between existing values, and extrapolate a constant
            beyond the existing values.
        stackgroup
            Set several scatter traces (on the same subplot) to the
            same stackgroup in order to add their y values (or
            their x values if `orientation` is "h"). If blank or
            omitted this trace will not be stacked. Stacking also
            turns `fill` on by default, using "tonexty" ("tonextx")
            if `orientation` is "h" ("v") and sets the default
            `mode` to "lines" irrespective of point count. You can
            only stack on a numeric (linear or log) axis. Traces in
            a `stackgroup` will only fill to (or be filled to)
            other traces in the same group. With multiple
            `stackgroup`s or some traces stacked and some not, if
            fill-linked traces are not already consecutive, the
            later ones will be pushed down in the drawing order.
        stream
            :class:`plotly.graph_objects.scatter.Stream` instance
            or dict with compatible properties
        text
            Sets text elements associated with each (x,y) pair. If
            a single string, the same string appears over all the
            data points. If an array of string, the items are
            mapped in order to the this trace's (x,y) coordinates.
            If trace `hoverinfo` contains a "text" flag and
            "hovertext" is not set, these elements will be seen in
            the hover labels.
        textfont
            Sets the text font.
        textposition
            Sets the positions of the `text` elements with respects
            to the (x,y) coordinates.
        textpositionsrc
            Sets the source reference on Chart Studio Cloud for
            textposition .
        textsrc
            Sets the source reference on Chart Studio Cloud for
            text .
        texttemplate
            Template string used for rendering the information text
            that appear on points. Note that this will override
            `textinfo`. Variables are inserted using %{variable},
            for example "y: %{y}". Numbers are formatted using
            d3-format's syntax %{variable:d3-format}, for example
            "Price: %{y:$.2f}".
            https://github.com/d3/d3-format/tree/v1.4.5#d3-format
            for details on the formatting syntax. Dates are
            formatted using d3-time-format's syntax
            %{variable|d3-time-format}, for example "Day:
            %{2019-01-01|%A}". https://github.com/d3/d3-time-
            format/tree/v2.2.3#locale_format for details on the
            date formatting syntax. Every attributes that can be
            specified per-point (the ones that are `arrayOk: true`)
            are available.
        texttemplatesrc
            Sets the source reference on Chart Studio Cloud for
            texttemplate .
        uid
            Assign an id to this trace, Use this to provide object
            constancy between traces during animations and
            transitions.
        uirevision
            Controls persistence of some user-driven changes to the
            trace: `constraintrange` in `parcoords` traces, as well
            as some `editable: true` modifications such as `name`
            and `colorbar.title`. Defaults to `layout.uirevision`.
            Note that other user-driven trace attribute changes are
            controlled by `layout` attributes: `trace.visible` is
            controlled by `layout.legend.uirevision`,
            `selectedpoints` is controlled by
            `layout.selectionrevision`, and `colorbar.(x|y)`
            (accessible with `config: {editable: true}`) is
            controlled by `layout.editrevision`. Trace changes are
            tracked by `uid`, which only falls back on trace index
            if no `uid` is provided. So if your app can add/remove
            traces before the end of the `data` array, such that
            the same trace has a different index, you can still
            preserve user-driven changes if you give each trace a
            `uid` that stays with it as it moves.
        unselected
            :class:`plotly.graph_objects.scatter.Unselected`
            instance or dict with compatible properties
        visible
            Determines whether or not this trace is visible. If
            "legendonly", the trace is not drawn, but can appear as
            a legend item (provided that the legend itself is
            visible).
        x
            Sets the x coordinates.
        x0
            Alternate to `x`. Builds a linear space of x
            coordinates. Use with `dx` where `x0` is the starting
            coordinate and `dx` the step.
        xaxis
            Sets a reference between this trace's x coordinates and
            a 2D cartesian x axis. If "x" (the default value), the
            x coordinates refer to `layout.xaxis`. If "x2", the x
            coordinates refer to `layout.xaxis2`, and so on.
        xcalendar
            Sets the calendar system to use with `x` date data.
        xhoverformat
            Sets the hover text formatting rulefor `x`  using d3
            formatting mini-languages which are very similar to
            those in Python. For numbers, see:
            https://github.com/d3/d3-format/tree/v1.4.5#d3-format.
            And for dates see: https://github.com/d3/d3-time-
            format/tree/v2.2.3#locale_format. We add two items to
            d3's date formatter: "%h" for half of the year as a
            decimal number as well as "%{n}f" for fractional
            seconds with n digits. For example, *2016-10-13
            09:15:23.456* with tickformat "%H~%M~%S.%2f" would
            display *09~15~23.46*By default the values are
            formatted using `xaxis.hoverformat`.
        xperiod
            Only relevant when the axis `type` is "date". Sets the
            period positioning in milliseconds or "M<n>" on the x
            axis. Special values in the form of "M<n>" could be
            used to declare the number of months. In this case `n`
            must be a positive integer.
        xperiod0
            Only relevant when the axis `type` is "date". Sets the
            base for period positioning in milliseconds or date
            string on the x0 axis. When `x0period` is round number
            of weeks, the `x0period0` by default would be on a
            Sunday i.e. 2000-01-02, otherwise it would be at
            2000-01-01.
        xperiodalignment
            Only relevant when the axis `type` is "date". Sets the
            alignment of data points on the x axis.
        xsrc
            Sets the source reference on Chart Studio Cloud for  x
            .
        y
            Sets the y coordinates.
        y0
            Alternate to `y`. Builds a linear space of y
            coordinates. Use with `dy` where `y0` is the starting
            coordinate and `dy` the step.
        yaxis
            Sets a reference between this trace's y coordinates and
            a 2D cartesian y axis. If "y" (the default value), the
            y coordinates refer to `layout.yaxis`. If "y2", the y
            coordinates refer to `layout.yaxis2`, and so on.
        ycalendar
            Sets the calendar system to use with `y` date data.
        yhoverformat
            Sets the hover text formatting rulefor `y`  using d3
            formatting mini-languages which are very similar to
            those in Python. For numbers, see:
            https://github.com/d3/d3-format/tree/v1.4.5#d3-format.
            And for dates see: https://github.com/d3/d3-time-
            format/tree/v2.2.3#locale_format. We add two items to
            d3's date formatter: "%h" for half of the year as a
            decimal number as well as "%{n}f" for fractional
            seconds with n digits. For example, *2016-10-13
            09:15:23.456* with tickformat "%H~%M~%S.%2f" would
            display *09~15~23.46*By default the values are
            formatted using `yaxis.hoverformat`.
        yperiod
            Only relevant when the axis `type` is "date". Sets the
            period positioning in milliseconds or "M<n>" on the y
            axis. Special values in the form of "M<n>" could be
            used to declare the number of months. In this case `n`
            must be a positive integer.
        yperiod0
            Only relevant when the axis `type` is "date". Sets the
            base for period positioning in milliseconds or date
            string on the y0 axis. When `y0period` is round number
            of weeks, the `y0period0` by default would be on a
            Sunday i.e. 2000-01-02, otherwise it would be at
            2000-01-01.
        yperiodalignment
            Only relevant when the axis `type` is "date". Sets the
            alignment of data points on the y axis.
        ysrc
            Sets the source reference on Chart Studio Cloud for  y
            .
        
Did you mean "line"?

Bad property path:
range_x
^^^^^