# Specify tables and columns

In [1]:
import numpy as np
import pandas as pd
import orca
import os; os.chdir('../')
import warnings; warnings.simplefilter('ignore')

# Set data directory

d = '/home/data/fall_2018/'

if 'data_directory' in orca.list_injectables():
    d = orca.get_injectable('data_directory')
    
from scripts import datasources, models, variables    

In [2]:
@orca.table(cache=True)
def persons():
    df = pd.read_csv(
        d + 'chts_persons_w_zone_ids.csv',
        index_col = ["SAMPN", "PERNO"]
    )
    return df

#persons_chts = persons_df1.join(persons_df2, how="left")

persons = orca.get_table('persons').to_frame()
students = persons.loc[persons['STUDE'].isin([1, 2])# full time & part time students
                 & persons['SCHOL'].isin([3,  # Kindergarten to grade 8
                                          4,  # Grades 9 to 12 
                                          6,  # 2-year college (community college) 
                                          7,  # 4-year college or university 
                                          8]) # Graduate school / Professional 
                 & (~persons['SNAME_lookup'].isna()) 
                 & (persons['SNAME_lookup'] != "DK/RF")]
len(students)

schools_raw = students.groupby(by=["SCHOL", "SNAME_lookup", "SZIP_lookup"]).size().reset_index(name='counts')
# There are 3505 unique "schools", most of them have 1 student in the CHTS sample
# For now, keep only schools with 5+ students in the CHTS sample
schools = schools_raw.loc[schools_raw['counts'] >= 5]#.drop(columns=["counts"])
schools["school_id"] = np.arange(len(schools))

students = pd.merge(students, schools.drop(columns=["counts"]), 
                    how="left", on=["SCHOL", "SNAME_lookup", "SZIP_lookup"])#["school_id"]
students = students.loc[~students['school_id'].isna()]

orca.add_table('students', students)
orca.add_table('schools', schools)
#len(students)

<orca.orca.DataFrameWrapper at 0x7fdce5730240>

In [3]:
@orca.column('students', 'is_college_student', cache=True)
def is_college_student(students):
    is_college_map = {3: 0,
                      4: 0,
                      6: 1,
                      7: 1,
                      8: 1}
    return students.SCHOL.map(is_college_map)

#@orca.column('students', 'school_id', cache=True)
#def school_id(students, schools):
#    misc.reindex()
#    return pd.merge(students, schools, how="left", on=["SCHOL", "SNAME_lookup", "SZIP_lookup"])["school_id"]

# Configure models

In [4]:
from urbansim_templates import modelmanager

modelmanager.initialize()

from urbansim_templates.models import LargeMultinomialLogitStep, SegmentedLargeMultinomialLogitStep
m0 = LargeMultinomialLogitStep(
      choosers="students",
      alternatives="schools",
      model_expression="counts",
      choice_column="school_id",
      alt_sample_size = 20
)
m0.fit()


KeyError: 'SmallMultinomialLogitStep'

In [None]:
from urbansim_templates.models import SegmentedLargeMultinomialLogitStep

m = SegmentedLargeMultinomialLogitStep(
      defaults = m0,
      name = "school-choice-model",
      segmentation_column = "is_college_student",
      )

m.fit_all()