In [27]:
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
import numpy as np
from pandas.api.types import CategoricalDtype
pd.set_option('display.float_format', '{:.2f}'.format)

nominal_mapping = {
    'Agree': 1,
    'Disagree': 0,
    "Unknown" : np.nan # Use numpy's NaN to represent missing values
}


In [40]:
business_df=pd.read_csv('CP201A Business Data.csv', delimiter = ',')
business_df.rename(columns={"Technology (list all with commas between)":"tech_use",
                        "Financial Stability": "financial_stability",
                        "Family Owned": "family_owned",
                        "Struggle without Technology": "tech_struggle",
                        "Person Surveyed": "respondent",
                        "Business Type": "type",
                        "Business Length Open": "length_open",
                        "Business ID" : "ID"}, inplace=True)

In [43]:
# Clean Categorical Data of Financial Stability
# financial_group: The financial stability of this business has fully recovered from the COVID-19 pandemic.

# Map 'Agree' and 'Disagree' groupings
group_mapping = {
    'Strongly Agree': 'Agree',
    'Somewhat Agree': 'Agree',
    'Strongly Disagree': 'Disagree',
    'Somewhat Disagree': 'Disagree',
    "Don't Know/NA" : 'Unknown'
}

# Create a new column 'housing_group' for the grouped categories
business_df['financial_group'] = business_df['financial_stability'].map(group_mapping)

# Set dummy variables 
business_df['financialrecovery_dv'] = business_df['financial_group'].map(nominal_mapping)

In [41]:
business_df[["family_owned"]].value_counts(dropna=False)

family_owned
Yes             137
No               55
Don't Know        8
Name: count, dtype: int64

In [44]:
pd.crosstab(index=business_df['family_owned'], columns=business_df['financial_group'], margins=True, normalize='index')

financial_group,Agree,Disagree,Unknown
family_owned,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Don't Know,0.25,0.25,0.5
No,0.31,0.29,0.4
Yes,0.31,0.48,0.2
All,0.31,0.42,0.27


In [42]:
# Create a new column for filtered length open that excludes 0-3 years
length_mapping = {
    "More than 10 years": "More than 10",
    "0-3 years": np.nan,
    "6-10 years": "4-10",
    "4-5 years": "4-10"
}
business_df['length_open_filtered'] = business_df['length_open'].map(length_mapping)
business_df['open_ten_plus_dv'] = business_df['length_open_filtered'].map({"More than 10":1,"4-10":0})
business_df['open_four_ten_dv'] = business_df['length_open_filtered'].map({"More than 10":0,"4-10":1}) 

In [37]:
pd.crosstab(index=business_df['open_ten_plus_dv'], columns="Total")

col_0,Total
open_ten_plus_dv,Unnamed: 1_level_1
0.0,49
1.0,89


In [32]:
pd.crosstab(index=business_df['length_open_filtered'], columns=business_df['financialrecovery_dv'], margins=True, normalize='index')

financialrecovery_dv,0.00,1.00
length_open_filtered,Unnamed: 1_level_1,Unnamed: 2_level_1
4-10,0.5,0.5
More than 10,0.66,0.34
All,0.6,0.4


In [39]:
pd.crosstab(index=business_df['open_ten_plus_dv'], columns=business_df['financialrecovery_dv'], margins=True)

financialrecovery_dv,0.00,1.00,All
open_ten_plus_dv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.00,22,22,44
1.00,51,26,77
All,73,48,121
