In [None]:
import importlib
from collections import Counter
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from networkx import connected_components
from psycopg.types.net import Network

from classes.base_data import BaseData

from constants import constants as c
from classes import base_landlord_data as bld
from workflows import workflow_landlords as wkfl

importlib.reload(c)
importlib.reload(bld)
importlib.reload(wkfl)

from classes.base_landlord_data import NetworkAnalysis

from constants.constants import DATA_ROOT
from workflows.workflow_landlords import WkflValidateAddresses, WkflFixAddresses, WkflPrepareProps, WkflSubsetRentals, WkflCleanTaxRecords, \
    WkflPrepareCorpLLC, WkflValidateCorpLLC, WkflTaxStringMatch, WkflNetworkAnalysis


# Summary

In [None]:
wkfl_validated_addresses = WkflValidateAddresses()
wkfl_validated_addresses.workflow()
wkfl_validated_addresses.save_outputs()

In [None]:
wkfl_fix_addresses = WkflFixAddresses()
wkfl_fix_addresses.workflow()
wkfl_fix_addresses.save_output()

In [None]:
wkfl_prepare_props = WkflPrepareProps()
wkfl_prepare_props.workflow()
wkfl_prepare_props.save_outputs()

In [None]:
wkfl_subset_rentals = WkflSubsetRentals()
wkfl_subset_rentals.workflow()
wkfl_subset_rentals.save_outputs()

In [None]:
wkfl_clean_tax_records = WkflCleanTaxRecords()
wkfl_clean_tax_records.workflow()
wkfl_clean_tax_records.save_outputs()

In [None]:
wkfl_prepare_corp_llc = WkflPrepareCorpLLC()
wkfl_prepare_corp_llc.workflow()
wkfl_prepare_corp_llc.save_outputs()

In [None]:
wkfl_validate_corp_llc = WkflValidateCorpLLC()
wkfl_validate_corp_llc.workflow()
wkfl_validate_corp_llc.save_outputs()

In [None]:
wkfl_tax_string_match = WkflTaxStringMatch()
wkfl_tax_string_match.workflow()
wkfl_tax_string_match.save_outputs()

In [None]:
wkfl_network_analysis = WkflNetworkAnalysis()
wkfl_network_analysis.workflow()
wkfl_network_analysis.save_outputs()

In [None]:
df_test = wkfl_network_analysis.df_outputs["props_networked"]

In [None]:
df_test["FINAL_COMPONENT"].value_counts().reset_index()

In [None]:
df_networked = BaseData.get_df(
    "/Users/dpederson/Library/CloudStorage/ProtonDrive-director@landlordmapper.org-folder/data/datasets/chi2/landlord_workflow/wkfl_props_networked_test3.csv",{}
)

In [None]:
df_networked


# 1. WkflValidateAddresses

In [None]:
wkfl_validated_addresses = WkflValidateAddresses()

In [None]:
# EXECUTE WORKFLOW
df_validated_props, df_validated_corps_llcs = wkfl_validated_addresses.drop_duplicates()

In [None]:
df_unvalidated = wkfl_validated_addresses.combine_unvalidated_addrs()

In [None]:
df_unvalidated_filtered = wkfl_validated_addresses.remove_validated(df_unvalidated, df_validated_props, df_validated_corps_llcs)

In [None]:
df_poboxes, df_others = wkfl_validated_addresses.separate_poboxes(df_unvalidated_filtered)

In [None]:
df_poboxes_fixed = wkfl_validated_addresses.fix_poboxes(df_poboxes)

In [None]:
df_validated_poboxes, df_validated_others = wkfl_validated_addresses.run_validators(df_poboxes_fixed, df_others)

In [None]:
df_validated_new, df_validated_master = wkfl_validated_addresses.concatenate_validated(
    df_validated_poboxes, df_validated_props, df_validated_corps_llcs, df_validated_others
)

In [None]:
df_unvalidated_master_final = wkfl_validated_addresses.generate_unvalidated_master(df_validated_new)

In [None]:
df_validated_master_final = wkfl_validated_addresses.generate_validated_master(df_validated_master)

# 2. WkflFixAddresses

In [None]:
wkfl_fix_addresses = WkflFixAddresses()

# LOAD DATA
df_addrs_to_fix = wkfl_fix_addresses.df_addrs_to_fix_in
df_addrs_to_fix_unit = wkfl_fix_addresses.df_addrs_to_fix_unit_in
df_addrs_to_fix_missing = wkfl_fix_addresses.df_addrs_to_fix_missing_in

# EXECUTE WORKFLOW
df_unit_fixed, df_missing_fixed = wkfl_fix_addresses.add_missing_units_to_formatted_address(df_addrs_to_fix_unit, df_addrs_to_fix_missing)

In [None]:
df_valid_addrs_updated_fixed = wkfl_fix_addresses.replace_fixed_addresses(df_addrs_to_fix, df_unit_fixed, df_missing_fixed)

In [None]:
df_valid_addrs_updated_final = wkfl_fix_addresses.finalize_validated_master(df_valid_addrs_updated_fixed)

# 3. WkflPrepareProps

In [None]:
wkfl_prepare_props = WkflPrepareProps()

# LOAD DATA
df_less_6 = wkfl_prepare_props.df_less_6_in
df_greater_7 = wkfl_prepare_props.df_greater_7_in
df_pins_results = wkfl_prepare_props.df_pins_results_in

# EXECUTE WORKFLOW
df_props_validated = wkfl_prepare_props.merge_validated_addresses()

In [None]:
df_greater_7_clean, df_pins_results_clean = wkfl_prepare_props.add_property_classes(df_greater_7, df_pins_results)

In [None]:
df_merged = wkfl_prepare_props.merge_class_props(df_props_validated, df_less_6, df_greater_7_clean, df_pins_results_clean)

In [None]:
df_merged_clean = wkfl_prepare_props.clean_class_codes(df_merged)

In [None]:
df_merged_final = wkfl_prepare_props.finalize_class_codes(df_merged_clean)

# 4. WkflSubsetRentals

In [None]:
wkfl_subset_rentals = WkflSubsetRentals()

# LOAD DATA
df_props_all = wkfl_subset_rentals.df_props_all_in
df_mto = wkfl_subset_rentals.df_mto_in

# EXECUTE WORKFLOW
df_rentals = wkfl_subset_rentals.subset_by_class_codes(df_props_all)

In [None]:
df_rentals_missed = wkfl_subset_rentals.subset_by_taxpayer_address(df_rentals, df_props_all)

In [None]:
df_props_mto = wkfl_subset_rentals.subset_by_mto_pins(df_mto, df_props_all)


In [None]:
df_rentals_final = wkfl_subset_rentals.finalize_rental_subset(df_rentals, df_rentals_missed, df_props_mto)


# 5. WkflCleanTaxRecords

In [None]:
wkfl_clean_tax_records = WkflCleanTaxRecords()

# LOAD DATA
df_rentals = wkfl_clean_tax_records.df_rentals_in

# EXECUTE WORKFLOW
df_rentals_cleaned = wkfl_clean_tax_records.clean_taxpayer_names(df_rentals)

In [None]:
df_rentals_bools = wkfl_clean_tax_records.add_common_name_address_booleans(df_rentals_cleaned)

In [None]:
df_corp_bools = wkfl_clean_tax_records.add_corp_booleans(df_rentals_bools)

In [None]:
df_cleaned_final = wkfl_clean_tax_records.add_columns_banks_trusts(df_corp_bools)

# 6. WkflPrepareCorpLlc

In [None]:
wkfl_prepare_corp_llc = WkflPrepareCorpLLC()

# LOAD DATA
df_corp = wkfl_prepare_corp_llc.df_corp_in
df_llc = wkfl_prepare_corp_llc.df_llc_in
df_props = wkfl_prepare_corp_llc.df_props_in

# EXECUTE WORKFLOW
df_corp_subset, df_llc_subset = wkfl_prepare_corp_llc.subset_active_corps_llcs(df_corp, df_llc)

In [None]:
df_corp_subset_addrs = wkfl_prepare_corp_llc.set_corp_address_columns(df_corp_subset)

In [None]:
df_llc_subset_fixed_zips = wkfl_prepare_corp_llc.fix_llc_zip_codes(df_llc_subset)

In [None]:
df_llc_subset_addrs = wkfl_prepare_corp_llc.set_llc_address_columns(df_llc_subset_fixed_zips)

In [None]:
df_corps_llcs = wkfl_prepare_corp_llc.extract_unique_corps_llcs(df_corp_subset_addrs, df_llc_subset_addrs)

In [None]:
df_corps_llcs_cleaned = wkfl_prepare_corp_llc.clean_corp_llc_names(df_corps_llcs)

In [None]:
df_merge_clean = wkfl_prepare_corp_llc.merge_on_clean_name(df_props, df_corps_llcs_cleaned)

In [None]:
df_merge_core = wkfl_prepare_corp_llc.merge_on_core_name(df_merge_clean, df_corps_llcs_cleaned)

In [None]:
df_matches = wkfl_prepare_corp_llc.get_string_matches(df_merge_core, df_corps_llcs_cleaned)

In [None]:
df_merge_string_match = wkfl_prepare_corp_llc.merge_on_string_matches(df_merge_core, df_matches)

In [None]:
df_corps_props, df_llcs_props = wkfl_prepare_corp_llc.subset_corps_llcs(
    df_merge_string_match, df_corp_subset_addrs, df_llc_subset_addrs
)

# 7. WkflValidateCorpLlc

In [None]:
wkfl_validate_corp_llc = WkflValidateCorpLLC()

# LOAD DATA
df_llc_props = wkfl_validate_corp_llc.df_llc_props_in
df_corp_props = wkfl_validate_corp_llc.df_corp_props_in
df_valid_addrs = wkfl_validate_corp_llc.df_valid_addrs_in
df_rentals = wkfl_validate_corp_llc.df_rentals_in

# EXECUTE WORKFLOW
df_corp_props_cleaned, df_llc_props_cleaned = wkfl_validate_corp_llc.drop_dups_add_is_corp_llc_booleans(df_corp_props, df_llc_props)

In [None]:
df_corp_merged = wkfl_validate_corp_llc.merge_corps_on_addresses(df_corp_props_cleaned, df_valid_addrs)

In [None]:
df_llc_merged = wkfl_validate_corp_llc.merge_llcs_on_addresses(df_llc_props_cleaned, df_valid_addrs)

In [None]:
df_corp_merged_final, df_llc_merged_final = wkfl_validate_corp_llc.clean_address_merges(df_corp_merged, df_llc_merged)

In [None]:
df_rentals_merged = wkfl_validate_corp_llc.merge_props_on_corps(df_llc_merged_final, df_corp_merged_final, df_rentals)

In [None]:
df_rentals_merged_cleaned = wkfl_validate_corp_llc.clean_merge_columns(df_rentals_merged)

In [None]:
df_rentals_match_addresses = wkfl_validate_corp_llc.set_address_matching_columns(df_rentals_merged_cleaned)

In [None]:
df_rentals_final = wkfl_validate_corp_llc.set_address_booleans(df_rentals_match_addresses)

# 8. WkflTaxStringMatch

In [None]:
wkfl_tax_string_match = WkflTaxStringMatch()

# LOAD DATA
df_rentals = wkfl_tax_string_match.df_rentals_in
df_analysis = wkfl_tax_string_match.df_analysis_in


In [None]:
string_match_results = wkfl_tax_string_match.run_string_matching(df_rentals, df_analysis)


# 9. WkflNetworkAnalysis

In [None]:
wkfl_network_analysis = WkflNetworkAnalysis()

# LOAD DATA
df_rentals = wkfl_network_analysis.df_rentals_in
df_analysis = wkfl_network_analysis.df_analysis_in

In [None]:
results = wkfl_network_analysis.run_network_analysis(df_rentals, df_analysis)

In [None]:
wkfl_network_analysis.save_outputs()
