In [1]:
import sys, os, copy
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime as dt
from survey import Survey, nine_to_county, purp_num_to_name18, purp_num_to_name23, mode_num_to_name23, county_order
from processed_survey import ProcessedSurvey
from survey_completeness import flag_complete, PERSON_REQS, HH_REQS, DAY_REQS, TRIP_REQS
sys.path.insert(0, r'Y:\champ\util\pythonlib-migration\master_versions\misc_utils')
from df_utils import df_to_excel

from openpyxl import load_workbook
from openpyxl.cell import MergedCell

In [2]:
SURVEY_2023 = {'household':{'filepath_or_buffer':r'..\..\..\..\Review_20241127\flagged\hh.csv'},
               'person':{'filepath_or_buffer':r'..\..\..\..\Review_20241127\flagged\person.csv'},
               'day':{'filepath_or_buffer':r'..\..\..\..\Review_20241127\flagged\day.csv'},
               'trip':{'filepath_or_buffer':r'..\..\..\..\Review_20241127\flagged\trip.csv'},
               'vehicle':{'filepath_or_buffer':r'..\..\..\..\Deliverable_20241127\vehicle.csv'},
               'location':{'filepath_or_buffer':r'..\..\..\..\Deliverable_20241127\location.csv'},
               }
OUTDIR = r'..\..\..\..\Review_20241127'
COUNTIES = r'Q:\GIS\Boundaries\Counties\Counties.shp'
FIPS_COUNTIES   =['001','013','041','055','075','081','085','095','097']

In [3]:
template_path = r'<PATH>\completeness_sensitivity.xlsx'
output_path = r'<PATH>\completeness_sensitivity_rmove_only.xlsx'

In [4]:
survey_args = {'dir':r'<PATH>',
               'hh':    {'file':'survey2023_hrecx_rewt_base2023.dat',
                         'sep':' ',
                         'weight':'hhexpfac'},
               'person':{'file':'survey2023_precx_rewt_base2023.dat',
                         'sep':' ',
                         'weight':'psexpfac'},
               'day':   {'file':'survey2023_pdayx_rewt_base2023.dat',
                         'sep':' ',
                         'weight':'pdexpfac'},
               'trip':  {'file':'survey2023_tripx_rewt_base2023.dat',
                         'sep':' ',
                         'weight':'trexpfac'},
               'tour':  {'file':'survey2023_tourx_rewt_base2023.dat',
                         'sep':' ',
                         'weight':'toexpfac'},
#               'raw_dir': r'<PATH>',
#               'raw_hh':     {'file':'hh.csv',
#                              'weight':'hh_weight'},
#               'raw_person': {'file':'person.csv',
#                              'weight':'person_weight'},
#               'raw_day':    {'file':'day.csv',
#                              'weight':'day_weight'},
#               'raw_trip':   {'file':'trip.csv',
#                              'weight':'trip_weight'},
             }

In [5]:
s23 = Survey(**SURVEY_2023)

In [6]:
ps23 = ProcessedSurvey(**survey_args)
hh2 = ps23.hh.data.rename(columns={'hhno':'hh_id'})
hh2['tour_complete'] = 1
person2 = ps23.person.data.rename(columns={'hhno':'hh_id','pno':'person_num'})
person2['tour_complete'] = 1
day2 = ps23.day.data.rename(columns={'hhno':'hh_id','pno':'person_num','day':'travel_dow'})
day2['tour_complete'] = 1
trip2 = ps23.trip.data.rename(columns={'hhno':'hh_id','pno':'person_num','day':'travel_dow','tsvid':'trip_num'})
trip2['tour_complete'] = 1
tour2 = ps23.tour.data
tour2['tour_complete'] = 1

In [7]:
ITEM_CRITERIA = ['lenient','strict']
HOUSEHOLD_UNIT = ['family','household']
CONCURRENT_DAYS = [0,1,2,3,4,5,6,7]
CONCURRENT_WEIGHTED_DAYS = [0,1]

mi = pd.MultiIndex.from_product([ITEM_CRITERIA,HOUSEHOLD_UNIT,CONCURRENT_DAYS,CONCURRENT_WEIGHTED_DAYS],
                                names=['item_critera','household_unit','concurrent_days','concurrent_weighted_days'])
hhcols = ['sfcta_hh_survey_complete','sfcta_hh_persons_complete',
          'sfcta_hh_concurrent_complete','sfcta_hh_concurrent_weighted_complete',
          'sfcta_hh_complete']

hhdf = pd.DataFrame(index=mi, columns=hhcols+['tour_complete','all_complete'])
psdf = pd.DataFrame(index=mi, columns=hhcols+['tour_complete','all_complete'])
pddf = pd.DataFrame(index=mi, columns=hhcols+['tour_complete','all_complete'])
trdf = pd.DataFrame(index=mi, columns=hhcols+['tour_complete','all_complete'])
todf = pd.DataFrame(index=mi, columns=hhcols+['tour_complete','all_complete'])
for h1 in ITEM_CRITERIA:
    if h1 == 'strict':
        person_reqs = PERSON_REQS + ['student_complete','work_loc_complete','transit_pass_complete','has_proxy_complete','can_drive_complete']
    else:
        person_reqs = PERSON_REQS
        
    for h2 in HOUSEHOLD_UNIT:
        for concurrent_days in CONCURRENT_DAYS:
            for concurrent_weighted_days in CONCURRENT_WEIGHTED_DAYS:
                flagged = flag_complete(
                    copy.copy(s23), 
                    hh_unit=h2,
                    person_reqs=person_reqs,
                    concurrent_days=concurrent_days,
                    concurrent_weighted_days=concurrent_weighted_days
                )
                print(h2, concurrent_days, concurrent_weighted_days, len(s23.person), len(flagged.person))
                hh = pd.merge(flagged.hh, hh2[['hh_id','tour_complete']], how='left').fillna(0)
                hh['all_complete'] = hh['sfcta_hh_complete'] * hh['tour_complete']
                person = pd.merge(flagged.person, person2[['hh_id','person_num','tour_complete']], how='left').fillna(0)
                person = pd.merge(flagged.hh[['hh_id']+hhcols], person)
                person['all_complete'] = person['sfcta_hh_complete'] * person['tour_complete']
                day = pd.merge(flagged.day, day2[['hh_id','person_num','travel_dow','tour_complete']], how='left').fillna(0)
                day = pd.merge(flagged.hh[['hh_id']+hhcols], day)
                day['all_complete'] = day['sfcta_hh_complete'] * day['tour_complete']
                trip = pd.merge(flagged.trip, trip2[['hh_id','person_num','travel_dow','trip_num','tour_complete']], how='left').fillna(0)
                trip = pd.merge(flagged.hh[['hh_id']+hhcols], trip)
                trip['all_complete'] = trip['sfcta_hh_complete'] * trip['tour_complete']

                outcols = hhcols+['tour_complete','all_complete']
                hhdata = [h1,h2,concurrent_days,concurrent_weighted_days]+list(hh[outcols].sum().values)
                d = pd.DataFrame(columns=['item_criteria','household_unit','concurrent_days','concurrent_weighted_days']+outcols, data=[hhdata])
                d.set_index(['item_criteria','household_unit','concurrent_days','concurrent_weighted_days'], inplace=True)
                hhdf.update(d)

                psdata = [h1,h2,concurrent_days,concurrent_weighted_days]+list(person[outcols].sum().values)
                d = pd.DataFrame(columns=['item_criteria','household_unit','concurrent_days','concurrent_weighted_days']+outcols, data=[psdata])
                d.set_index(['item_criteria','household_unit','concurrent_days','concurrent_weighted_days'], inplace=True)
                psdf.update(d)

                pddata = [h1,h2,concurrent_days,concurrent_weighted_days]+list(day[outcols].sum().values)
                d = pd.DataFrame(columns=['item_criteria','household_unit','concurrent_days','concurrent_weighted_days']+outcols, data=[pddata])
                d.set_index(['item_criteria','household_unit','concurrent_days','concurrent_weighted_days'], inplace=True)
                pddf.update(d)

                trdata = [h1,h2,concurrent_days,concurrent_weighted_days]+list(trip[outcols].sum().values)
                d = pd.DataFrame(columns=['item_criteria','household_unit','concurrent_days','concurrent_weighted_days']+outcols, data=[trdata])
                d.set_index(['item_criteria','household_unit','concurrent_days','concurrent_weighted_days'], inplace=True)
                trdf.update(d)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 0 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 0 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 1 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 1 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 2 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 2 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 3 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 3 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 4 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 4 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 5 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 5 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 6 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 6 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 7 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 7 1 15985 15210
household 0 0 15985 15985
household 0 1 15985 15985
household 1 0 15985 15985
household 1 1 15985 15985
household 2 0 15985 15985
household 2 1 15985 15985
household 3 0 15985 15985
household 3 1 15985 15985
household 4 0 15985 15985
household 4 1 15985 15985
household 5 0 15985 15985
household 5 1 15985 15985
household 6 0 15985 15985
household 6 1 15985 15985
household 7 0 15985 15985
household 7 1 15985 15985


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 0 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 0 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 1 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 1 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 2 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 2 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 3 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 3 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 4 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 4 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 5 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 5 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 6 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 6 1 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 7 0 15985 15210


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  person.drop(columns=['sfcta_num_days_complete'], inplace=True)


family 7 1 15985 15210
household 0 0 15985 15985
household 0 1 15985 15985
household 1 0 15985 15985
household 1 1 15985 15985
household 2 0 15985 15985
household 2 1 15985 15985
household 3 0 15985 15985
household 3 1 15985 15985
household 4 0 15985 15985
household 4 1 15985 15985
household 5 0 15985 15985
household 5 1 15985 15985
household 6 0 15985 15985
household 6 1 15985 15985
household 7 0 15985 15985
household 7 1 15985 15985


In [8]:
wb = load_workbook(template_path)
for sheet, df in zip(['household','person','day','trip'],[hhdf,psdf,pddf,trdf]):
    ws = wb[sheet]

    start_row = 3
    start_col = 1
    multiindex_offset = 1
    if isinstance(df.index, pd.MultiIndex):
        multiindex_offset = len(df.index.levels)
    # Write DataFrame row index
    for i, idx in enumerate(df.index):
        if isinstance(idx, tuple):
            for j, jdx in enumerate(idx):
                try:
                    ws.cell(row=start_row + i, column=start_col+j, value=jdx)
                except:
                    pass
        else:
            try:
                ws.cell(row=start_row + i, column=start_col, value=idx)
            except:
                pass
    
    # Write DataFrame columns and values
    for i, (idx, row) in enumerate(df.iterrows()):
        for j, value in enumerate(row):
            if pd.notnull(value):
                ws.cell(row=start_row + i, column=start_col + j +multiindex_offset, value=value)
    
# Save the updated Excel file
wb.save(output_path)
print(f"Data written successfully to {output_path}")

Data written successfully to Q:\Data\Surveys\HouseholdSurveys\MTC-SFCTA2022\Review_20241127\completeness_sensitivity_rmove_only.xlsx
