In [1]:
import pandas as pd
import numpy as np
import altair as alt
import seaborn as sns
import re
import warnings
warnings.filterwarnings('ignore')
np.set_printoptions(precision=3)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

At this point data has been scraped from the relavent gov websites for 9% LIHTC data- want to clean and combine it


In [2]:
ohio = pd.read_csv("Ohio_LIHTC_Approved_9%_augmented.csv")
georgia = pd.read_csv("Georgia_LIHTC_Approved_9%_augmented.csv")
california = pd.read_csv("California_LIHTC_Nine_Percent_augmented.csv")
virginia = pd.read_csv("Virginia_LIHTC_Approved_9%.csv")

First I want to create a concatianted subgrouping of all four states with project name, funding type, interest rate and amount of funds. I create state names columns, do some column 
renaming, would ideally make this a function if I have time 

In [3]:
california_financing = california[["Project Name_x","Sourcing Type","Interest Rate","Financing Type","Housing Type_x","Amount of Funds"]]
california_financing["State"] = "California"
california_financing = california_financing.rename(columns={"Project Name_x":"Project Name",
                                                            "Sourcing Type":"Financing_sub_cat",
                                                            "Housing Type_x":"Project Type",
                                                           "Financing Type":"Funding Type"})

In [4]:
virginia_financing = virginia[["File Name","Financing_sub_cat","Funding Type","Interest Rate","Amount of Funds"]]
virginia_financing["State"] = "Virginia"
virginia_financing = virginia_financing.rename(columns={"File Name":"Project Name"})

In [5]:
ohio_financing = ohio[["Project Name","Project Population","Financing_sub_cat","Funding Type","Amount of Funds"]]
ohio_financing["State"] = "Ohio"
ohio_financing = ohio_financing.rename(columns={"Project Population":"Project Type"})
ohio["Funding Type"] = ohio["Funding Type"].str[:-8]

In [6]:
georgia_financing = georgia[["File Name","Financing_sub_cat","Effective Interest Rate","Funding Type","Project Type","Amount of Funds"]]
georgia_financing["State"] = "Georgia"
georgia_financing = georgia_financing.rename(columns={"Effective Interest Rate":"Interest Rate","File Name":"Project Name"})
georgia_financing["Interest Rate"] = georgia_financing["Interest Rate"] *100 

In [7]:
financing = pd.concat([california_financing, virginia_financing,ohio_financing,georgia_financing], ignore_index=True)

In [8]:
financing["Amount of Funds"].replace(regex=True, inplace=True, to_replace=r'[^0-9.\-]', value=r'')
#overall distribution of interest rates 
financing["Amount of Funds"] = pd.to_numeric(financing["Amount of Funds"], errors='coerce')

financing = financing.replace({"Elderly": "Seniors", "HFOP": "Seniors","Families":"Family","Large Family":"Family"})


In [9]:
#label whether not a source of funding is gov grant financing 

gov_grant = ['Local Grant','State Grant','City Grant', 'Federal Grant']

def label_gov_grant(x):
    if x["Financing_sub_cat"] in gov_grant:
        return 1
    else:
        return 0 

financing["Gov_Grant"] = financing.apply(lambda x: label_gov_grant(x),axis=1)

In [10]:
#standarize funding type names 
#grants is just virginia and just three. and subsidized financing is just 9 things in virginia  
financing["Funding Type"] = financing["Funding Type"].replace({"Permanent Financing Sources": 'Permanent Financing',
                                   'Construction Financing Sources':"Construction Financing"})

### What are “typical” capital stacks?  E.g., what does a project with 2-3 permanent funding sources look like compared to a project with 4-5 v. more than 6?  I think it might be helpful just to illustrate how quickly it can get very complex.


Total count below 

In [11]:
financing_count = financing[["Financing_sub_cat","Project Name"]].groupby("Project Name").count().reset_index()


Average of about 8 funding sources per project, with the 25 percentile still needing 5 sources 

In [12]:
financing_count["Financing_sub_cat"].describe()

count   147.000
mean      7.952
std       3.919
min       1.000
25%       5.000
50%       8.000
75%      10.000
max      21.000
Name: Financing_sub_cat, dtype: float64

In [13]:
#148 unique projects 
len(financing["Project Name"].unique())

148

In [14]:
#1228 individual finance entries 
len(financing)

1232

In [15]:
financing[["Financing_sub_cat"]].value_counts()

Financing_sub_cat            
Bank Loan                        309
Local Grant                      216
Deferred Developer Fee           153
Other                            126
State Housing Credit Equity       85
Federal Housing Credit Equity     63
State Grant                       56
Tax Credit                        56
Federal Credit                    18
Federal Loan                      18
Federal Home Loan Bank            14
Fee Waivers                        9
State Funding                      9
City Grant                         8
Tax Credit Equity                  7
Public Private Partnership         7
Federal Grant                      5
Land Donation                      5
Historic Tax Credit                3
State Credit                       3
Donated Land                       2
Affordable Housing Trust           1
dtype: int64

Variation in the average number of funding sources per state. Virginia the lowest (although also smallest sample size)
at 4.3, with Georgia at 7.9, California at 9.2, and Ohio at 10.7

In [16]:
#Now to do the same analysis by state 
financing_count_state = financing[["State","Financing_sub_cat","Project Name"]].groupby(["State"]).describe().reset_index()
financing_count_state["Avg. Per State"] = financing_count_state["Project Name"]["count"]/financing_count_state["Project Name"]["unique"]

Stacked bar charts of the type of financing per project by state 

In [17]:
alt.Chart(georgia_financing).mark_bar().encode(
    x='Project Name',
    y='Amount of Funds',
    color='Financing_sub_cat',
    tooltip=['Financing_sub_cat']
)

In [18]:
#prep ohio data for stacked bar chart 
ohio_financing["Amount of Funds"] = ohio_financing["Amount of Funds"].str[:-3]#remove stuff after decimal
ohio_financing["Amount of Funds"].replace(regex=True, inplace=True, to_replace=r'[^0-9.\-]', value=r'')#stip out nonnumeric charecters
#ohio_financing["Amount of Funds"] = ohio_financing["Amount of Funds"].astype(int)
ohio_financing["Amount of Funds"] = pd.to_numeric(ohio_financing["Amount of Funds"], errors='coerce')
ohio_graph = ohio_financing[ohio_financing["Amount of Funds"] != ""]
ohio_graph["Amount of Funds"] = ohio_graph["Amount of Funds"].astype(float)#altair wont render a stacked bar chart unless its a float

In [19]:
alt.Chart(ohio_graph).mark_bar().encode(
    x='Project Name',
    y='Amount of Funds',
    color='Financing_sub_cat',
    tooltip=['Financing_sub_cat']
)

In [20]:
#prep cali data for stacked bar chart 
california_financing["Amount of Funds"].replace(regex=True, inplace=True, to_replace=r'[^0-9.\-]', value=r'')#stip out nonnumeric charecters
california_financing["Amount of Funds"] = pd.to_numeric(california_financing["Amount of Funds"], errors='coerce')
cali_graph = california_financing[california_financing["Amount of Funds"] != ""]
cali_graph["Amount of Funds"] = cali_graph["Amount of Funds"].astype(float)#altair wont render a stacked bar chart unless its a float


In [21]:
alt.Chart(cali_graph).mark_bar().encode(
    x='Project Name',
    y='Amount of Funds',
    color='Financing_sub_cat',
    tooltip=['Financing_sub_cat']
)

In [22]:
#prep virginia data for stacked bar chart 
virginia_financing["Amount of Funds"].replace(regex=True, inplace=True, to_replace=r'[^0-9.\-]', value=r'')#stip out nonnumeric charecters
virginia_financing["Amount of Funds"] = pd.to_numeric(virginia_financing["Amount of Funds"], errors='coerce')
virginia_graph = virginia_financing[california_financing["Amount of Funds"] != ""]
virginia_graph["Amount of Funds"] = virginia_graph["Amount of Funds"].astype(float)#altair wont render a stacked bar chart unless its a float


In [23]:
alt.Chart(virginia_graph).mark_bar().encode(
    x='Project Name',
    y='Amount of Funds',
    color='Financing_sub_cat',
    tooltip=['Financing_sub_cat']
)

Want to break up into three groupings by number of funding sources, see if i can find differences between the groups.

First want to cut to get optimal bin sizes 

In [24]:
financing_count["bins"] = pd.qcut(financing_count["Financing_sub_cat"],3,labels=["Low Number of Financing Sources", "Medium Number of Financing Sources", "High Number of Financing Sources"])


In [25]:
#bins wont be same size cus of discrete number cuttoff, but this seems fine 
financing_count["bins"].value_counts()

Medium Number of Financing Sources    55
Low Number of Financing Sources       53
High Number of Financing Sources      39
Name: bins, dtype: int64

In [26]:
financing_count = financing_count.rename(columns={"Financing_sub_cat":"Project_Financing_Sources_Count"})

In [27]:
#merge the bin names onto the main dataset 

financing_count = financing_count.rename(columns={"Financing_sub_cat":"Project_Financing_Sources_Count"})
source_count = financing.merge(financing_count, on="Project Name")

In [28]:
#filter to create seperate data frames for each bin 
low_source = source_count[source_count["bins"] == "Low Number of Financing Sources"]
medium_source = source_count[source_count["bins"] == "Medium Number of Financing Sources"]
high_source = source_count[source_count["bins"] == "High Number of Financing Sources"]

In [29]:
low_source["Financing_sub_cat"].value_counts()

Bank Loan                        76
State Grant                      34
Local Grant                      29
Deferred Developer Fee           21
Federal Housing Credit Equity    15
State Housing Credit Equity      13
Other                            10
Federal Loan                      7
Tax Credit                        4
Federal Grant                     3
Federal Home Loan Bank            3
Public Private Partnership        3
Federal Credit                    2
Land Donation                     2
Tax Credit Equity                 2
State Funding                     1
Name: Financing_sub_cat, dtype: int64

In [30]:
medium_source["Financing_sub_cat"].value_counts()

Bank Loan                        130
Local Grant                       66
Deferred Developer Fee            60
State Housing Credit Equity       40
Federal Housing Credit Equity     36
Other                             35
Tax Credit                        21
Federal Loan                      11
State Grant                        9
Federal Credit                     8
Federal Home Loan Bank             4
Tax Credit Equity                  4
State Credit                       3
Land Donation                      2
Federal Grant                      2
Historic Tax Credit                1
Public Private Partnership         1
State Funding                      1
Fee Waivers                        1
Name: Financing_sub_cat, dtype: int64

In [31]:
high_source["Financing_sub_cat"].value_counts()

Local Grant                      121
Bank Loan                        102
Other                             81
Deferred Developer Fee            72
State Housing Credit Equity       32
Tax Credit                        31
Federal Housing Credit Equity     12
State Grant                       11
City Grant                         8
Federal Credit                     8
Fee Waivers                        8
State Funding                      7
Federal Home Loan Bank             6
Public Private Partnership         3
Historic Tax Credit                2
Donated Land                       2
Land Donation                      1
Tax Credit Equity                  1
Affordable Housing Trust           1
Name: Financing_sub_cat, dtype: int64

In [32]:
#make things floats that are strings 
source_count["Amount of Funds"].replace(regex=True, inplace=True, to_replace=r'[^0-9.\-]', value=r'')
source_count["Amount of Funds"] = pd.to_numeric(source_count["Amount of Funds"], errors='coerce')
source_count["Interest Rate"].replace(regex=True, inplace=True, to_replace=r'[^0-9.\-]', value=r'')
source_count["Interest Rate"] = pd.to_numeric(source_count["Interest Rate"], errors='coerce')

### What share of projects rely on deferred developer fee?

Ill do this analysis by state 

In [33]:
howmany_georgia = financing[financing["State"]=="Georgia"][["Financing_sub_cat","Project Name"]].groupby("Project Name").sum()
len(howmany_georgia[howmany_georgia['Financing_sub_cat'].str.contains("Developer")])

24

24 out of thirty projects in Georgia have defered developer fees. 

In [34]:
howmany_ohio = financing[financing["State"]=="Ohio"][["Financing_sub_cat","Project Name"]].groupby("Project Name").sum()
len(howmany_ohio[howmany_ohio['Financing_sub_cat'].str.contains("Developer")])

27

27 out of 29 projects in ohio have deferred developer fees. 

In [35]:
#NaNs were messing up the group by for california, need to drop them 
cali_def= financing[financing["State"]=="California"][["Financing_sub_cat","Project Name"]]
cali_def = cali_def.dropna()
howmany_cali = cali_def.groupby("Project Name").sum()

len(howmany_cali[howmany_cali['Financing_sub_cat'].str.contains("Developer",na=False)])

41

41 out of 61 projects in California have deferred developer fees.

In [36]:
howmany_virginia = financing[financing["State"]=="Virginia"][["Financing_sub_cat","Project Name"]].groupby("Project Name").sum()
len(howmany_virginia[howmany_virginia['Financing_sub_cat'].str.contains("Developer")])

0

Virginia has no deffered dev fees 

### "What are the typical amount for other forms of local government financing? (maybe more than just the mean – curious to see distribution).  I think the main question I want to answer is whether it is more typical to have one bigger local government grant, or is it more typical to have lots of smaller amounts. By geography"


In [37]:
locals_only = financing[financing["Financing_sub_cat"] == 'Local Grant']

In [38]:
#do some type casting 
locals_only["Amount of Funds"].replace(regex=True, inplace=True, to_replace=r'[^0-9.\-]', value=r'')#stip out nonnumeric charecters
#ohio_financing["Amount of Funds"] = ohio_financing["Amount of Funds"].astype(int)
locals_only["Amount of Funds"] = pd.to_numeric(locals_only["Amount of Funds"], errors='coerce')

In [39]:
locals_only["Amount of Funds"].mean()

2323618.4305555555

What types of projects (senior, family, special needs) have the greatest number/share of additional government funding?

In [40]:
financing[["Gov_Grant","Project Type","Project Name"]].groupby("Project Type").describe()

Unnamed: 0_level_0,Gov_Grant,Gov_Grant,Gov_Grant,Gov_Grant,Gov_Grant,Gov_Grant,Gov_Grant,Gov_Grant
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Project Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
At-Risk,47.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Family,635.0,0.184,0.388,0.0,0.0,0.0,0.0,1.0
Seniors,243.0,0.074,0.262,0.0,0.0,0.0,0.0,1.0
Service Enriched,35.0,0.114,0.323,0.0,0.0,0.0,0.0,1.0
Special Needs,152.0,0.5,0.502,0.0,0.0,0.5,1.0,1.0


In [41]:
financing[["Gov_Grant","Project Type","Project Name","State"]].groupby(["State","Project Type"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Gov_Grant,Gov_Grant,Gov_Grant,Gov_Grant,Gov_Grant,Gov_Grant,Gov_Grant,Gov_Grant
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
State,Project Type,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
California,At-Risk,47.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
California,Family,315.0,0.359,0.48,0.0,0.0,0.0,1.0,1.0
California,Seniors,49.0,0.265,0.446,0.0,0.0,0.0,1.0,1.0
California,Special Needs,152.0,0.5,0.502,0.0,0.0,0.5,1.0,1.0
Georgia,Family,140.0,0.014,0.119,0.0,0.0,0.0,0.0,1.0
Georgia,Seniors,97.0,0.041,0.2,0.0,0.0,0.0,0.0,1.0
Ohio,Family,180.0,0.011,0.105,0.0,0.0,0.0,0.0,1.0
Ohio,Seniors,97.0,0.01,0.102,0.0,0.0,0.0,0.0,1.0
Ohio,Service Enriched,35.0,0.114,0.323,0.0,0.0,0.0,0.0,1.0


Average amount of a “Local Grant” by state:

In [42]:
locals_only[["Amount of Funds","State"]].groupby("State").mean()

Unnamed: 0_level_0,Amount of Funds
State,Unnamed: 1_level_1
California,2529526.785
Georgia,230000.0
Ohio,520285.714
Virginia,1645634.708


### What are the typical interest rates for bank loans? (maybe more than just the mean – curious to see distribution)
Construction v. permanent financing?
Across project types?
By geographies (I’m especially interested if CA cities, Atlanta, other big cities have lower interest rates on average compared to other places, due to CRA)


In [43]:
financing = financing[financing["Interest Rate"] != 225]#would have to imagine that the 225 figure is incorrect, drop
financing["Interest Rate"].replace(regex=True, inplace=True, to_replace=r'[^0-9.\-]', value=r'')
#overall distribution of interest rates 
financing["Interest Rate"] = pd.to_numeric(financing["Interest Rate"], errors='coerce')

financing["Interest Rate"].describe()

count   512.000
mean      3.603
std      10.023
min       0.000
25%       1.000
50%       3.000
75%       5.231
max     225.000
Name: Interest Rate, dtype: float64

Average interest rates are notably higher in Georgia than in Virginia or California (Ohio does not report). Data is thin in that Virginia did not report interest rate for construction financing so can't compare, but construction financing is significantly more expensive in Georgia and Slightly more expensive in California and permenant financing in California and Georgia. 

In [44]:
#drop zero percent interest rates 
pos_interest_financing = financing[financing["Interest Rate"] != 0]

In [45]:
pos_interest_financing = pos_interest_financing[pos_interest_financing["Project Type"]!="Service Enriched"]#there are no interest bearing loans in this category 
pos_interest_financing = pos_interest_financing[pos_interest_financing["Interest Rate"] < 8] #this drops exaclty one outlier 

In [46]:
pos_interest_financing[["Project Type","Interest Rate"]].groupby(["Project Type"]).describe()

Unnamed: 0_level_0,Interest Rate,Interest Rate,Interest Rate,Interest Rate,Interest Rate,Interest Rate,Interest Rate,Interest Rate
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Project Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
At-Risk,26.0,3.244,2.164,1.0,1.0,4.35,5.35,6.0
Family,200.0,4.092,1.625,0.42,3.0,4.0,5.653,7.75
Seniors,48.0,4.145,1.906,1.0,2.938,4.812,5.835,6.5
Special Needs,89.0,3.672,1.155,1.0,3.0,3.0,4.0,6.25


In [47]:
pos_interest_financing[["State","Interest Rate","Funding Type"]].groupby(["State","Funding Type"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,Interest Rate,Interest Rate,Interest Rate,Interest Rate,Interest Rate,Interest Rate,Interest Rate,Interest Rate
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
State,Funding Type,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
California,Construction Financing,133.0,3.849,1.44,1.0,3.0,4.0,5.02,6.25
California,Permanent Financing,164.0,3.777,1.578,0.42,3.0,3.0,5.5,6.5
Georgia,Construction Financing,34.0,5.283,1.296,2.0,5.048,5.5,6.0,7.75
Georgia,Permanent Financing,32.0,3.671,2.216,1.0,1.0,3.0,5.625,7.0
Virginia,Permanent Financing,66.0,2.778,1.788,0.5,1.0,2.95,4.0,6.75


Interest rates across project types

In [48]:
pos_interest_financing["Interest Rate"].replace(regex=True, inplace=True, to_replace=r'[^0-9.\-]', value=r'')
#overall distribution of interest rates 
pos_interest_financing["Interest Rate"] = pd.to_numeric(pos_interest_financing["Interest Rate"], errors='coerce')
#standardize project type names across states
pos_interest_financing = pos_interest_financing.replace({"Elderly": "Seniors", "HFOP": "Seniors","Families":"Family","Large Family":"Family"})

In [49]:
pos_interest_financing[["Project Type","Interest Rate"]].groupby(["Project Type"]).describe()

Unnamed: 0_level_0,Interest Rate,Interest Rate,Interest Rate,Interest Rate,Interest Rate,Interest Rate,Interest Rate,Interest Rate
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Project Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
At-Risk,26.0,3.244,2.164,1.0,1.0,4.35,5.35,6.0
Family,200.0,4.092,1.625,0.42,3.0,4.0,5.653,7.75
Seniors,48.0,4.145,1.906,1.0,2.938,4.812,5.835,6.5
Special Needs,89.0,3.672,1.155,1.0,3.0,3.0,4.0,6.25


### What share of permanent financing is debt, and does this differ by project type (senior, family, special needs) or geography (are there differences across states)?


In [50]:
#count number of projects 
num_projects = financing.drop_duplicates(subset=['Project Name'])
num_projects = num_projects.groupby(["Project Type"]).count().reset_index()[["Project Type","Project Name"]]

In [51]:
#should we keep federal loan? seems to be USDA loans with 1% interest that are meant to promote housing in rural areas 

debt = ['Bank Loan','Federal Loan']
#not_debt is not used for any analysis, just to be able to visualize all the categories in one place 

not_debt = ['Tax Credit','Federal Credit','State Credit','Federal Home Loan Bank','State Housing Credit Equity', 'Historic Tax Credit','Local Grant','State Funding',
            'State Grant','City Grant','Land Donation','Public Private Partnership', 
            'Federal Grant','Fee Waivers','Federal Housing Credit Equity','Deferred Developer Fee','Affordable Housing Trust']

#label whether not a source of funding is gov financing 
def label_debt_type(x):
    if x["Financing_sub_cat"] in debt:
        return 1
    if x["Financing_sub_cat"] in not_debt:
        return 0 
    else:
        return np.NaN

In [52]:
#count number of projects 
num_projects = financing.drop_duplicates(subset=['Project Name'])
num_projects = num_projects.groupby(["Project Type"]).count().reset_index()[["Project Type","Project Name"]]

In [53]:
financing["debt_financing"] = financing.apply(lambda x: label_debt_type(x),axis=1)

In [54]:
financing["debt_financing"].value_counts()

0.000    711
1.000    327
Name: debt_financing, dtype: int64

In [55]:
#31.5 percent of financing is debt financing 
326/(326+708)

0.3152804642166344

Does debt financing differ by state? 

In [56]:
state_debt_size = financing[["debt_financing","State","Amount of Funds"]].groupby("State").mean().reset_index()
state_debt_size

Unnamed: 0,State,debt_financing,Amount of Funds
0,California,0.337,3565371.628
1,Georgia,0.342,3140492.974
2,Ohio,0.313,2003198.191
3,Virginia,0.157,2713136.661


In [57]:
#Virinia projects, on average, use notably less debt financing 
financing.groupby(['Project Name', 'State',"Amount of Funds"]).debt_financing.mean().groupby('State').mean()

State
California   0.398
Georgia      0.301
Ohio         0.312
Virginia     0.157
Name: debt_financing, dtype: float64

Does debt financing differ by project type?

In [58]:
project_type_debt_size = financing[["debt_financing","Project Type","Amount of Funds"]].groupby("Project Type").mean().reset_index()
project_type_debt_size.merge(num_projects,on="Project Type").rename(columns={"Project Name":"Project Count"})

Unnamed: 0,Project Type,debt_financing,Amount of Funds,Project Count
0,At-Risk,0.795,2967885.621,7
1,Family,0.328,3276329.759,68
2,Seniors,0.379,2645403.917,29
3,Service Enriched,0.174,1409896.737,2
4,Special Needs,0.19,3053751.954,14


In [59]:
#if we aggregate at both project and project type level numbers change a bit, but not substantially 
project_level_project_type = financing[["debt_financing","Project Type","Amount of Funds",'Project Name']].groupby(['Project Name',"Project Type"]).mean().groupby("Project Type").mean()
project_level_project_type

Unnamed: 0_level_0,debt_financing,Amount of Funds
Project Type,Unnamed: 1_level_1,Unnamed: 2_level_1
At-Risk,0.82,3364943.831
Family,0.366,3505532.119
Seniors,0.387,3013701.172
Service Enriched,0.202,1415297.38
Special Needs,0.231,3380216.766


In [60]:
financing.groupby(['Project Name', 'State',"Amount of Funds"]).debt_financing.mean().groupby('State').mean()

State
California   0.398
Georgia      0.301
Ohio         0.312
Virginia     0.157
Name: debt_financing, dtype: float64

### Look at California deferened dev projects, see if difference between those with and without  

In [61]:
#add number of funding sources as a column 
financing_funding_count = financing.groupby("Project Name").count().reset_index()[["Project Name","Financing_sub_cat"]]
financing =financing.merge(financing_funding_count,on="Project Name")
financing = financing.rename(columns ={"Financing_sub_cat_x":"Financing_sub_cat","Financing_sub_cat_y":"Funding_count"} )

In [62]:
#need to fill nan values in Financing_sub_cats or else function will break 
financing.Financing_sub_cat = financing.Financing_sub_cat.fillna(value="Other")

In [63]:
#want to generate string of names to find which projects have def dev fees and which dont 
def_project = financing[["Project Name","Financing_sub_cat"]].groupby("Project Name").sum().reset_index()

In [64]:
#create a function to label them 
def label_def(x):
    if "Developer" in x["Financing_sub_cat"]:
        return 1
    else:
        return 0 

In [65]:
#now run function and merge results back onto financing 
def_project["def_project"] = def_project.apply(lambda x: label_def(x),axis=1)
def_project = def_project[["Project Name","def_project"]]
financing = financing.merge(def_project,on="Project Name")

In [66]:
#now filter for just california 
cali_finacing = financing[financing["State"] == "California"]

In [67]:
#keep only cols we want to compare 
#cali_finacing=cali_finacing[['Project Name', 'Financing_sub_cat', 'Interest Rate',"Funding_count"]]

In [68]:
#drop state housing equity col, group by project 
def_proj_dif1 = cali_finacing.groupby("def_project").mean().reset_index()

### If you have def developer fees, what percentage of the project do these fees represent?

In [69]:
#get total funding raised per project in cali 
total_cali_funds_per_project = cali_finacing[["Project Name","Financing_sub_cat","Amount of Funds"]].groupby("Project Name").sum().reset_index()

In [70]:
#get cost for def dev fees in cali 
cali_def_cost_per_project = cali_finacing[cali_finacing["Financing_sub_cat"]=='Deferred Developer Fee'][["Project Name","Amount of Funds"]].groupby("Project Name").sum()

In [71]:
#merge these onto each other and then divide
def_fees_cali = total_cali_funds_per_project.merge(cali_def_cost_per_project,on="Project Name")
def_fees_cali["def_fees_percentage_of_total_cost"] = def_fees_cali["Amount of Funds_y"] / def_fees_cali["Amount of Funds_x"]

In [72]:
#on average, six percent of total funds raised are deferred dev fees 
def_fees_cali["def_fees_percentage_of_total_cost"].describe()

count   41.000
mean     0.059
std      0.059
min      0.004
25%      0.025
50%      0.041
75%      0.067
max      0.296
Name: def_fees_percentage_of_total_cost, dtype: float64

### Examine Gov Grant funding 

In [73]:
#create same bins of number of funding sources as above, by for the financing dataframe 
financing["bins"] = pd.qcut(financing["Funding_count"],3,labels=["Low Number of Financing Sources", "Medium Number of Financing Sources", "High Number of Financing Sources"])


In [74]:
state_bin = financing[["bins","State","Amount of Funds","Gov_Grant"]].groupby(["bins","State"]).mean()
state_bin

Unnamed: 0_level_0,Unnamed: 1_level_0,Amount of Funds,Gov_Grant
bins,State,Unnamed: 2_level_1,Unnamed: 3_level_1
Low Number of Financing Sources,California,4256494.997,0.231
Low Number of Financing Sources,Georgia,3281901.38,0.0
Low Number of Financing Sources,Ohio,2909201.865,0.0
Low Number of Financing Sources,Virginia,3042925.264,0.579
Medium Number of Financing Sources,California,3362591.495,0.431
Medium Number of Financing Sources,Georgia,2906372.029,0.069
Medium Number of Financing Sources,Ohio,2179641.776,0.026
Medium Number of Financing Sources,Virginia,1240936.0,0.778
High Number of Financing Sources,California,2612563.633,0.514
High Number of Financing Sources,Georgia,3211673.167,0.0


In [75]:
#same analysis as above but first aggregating by individual project before aggregating by state 
# the results will be similar, but worth see the variation 
state_bin_by_project = financing[["bins","State","Amount of Funds","Gov_Grant","Project Name"]].groupby(["bins","State","Project Name"]).mean().groupby(["bins","State"]).mean()
state_bin_by_project

Unnamed: 0_level_0,Unnamed: 1_level_0,Amount of Funds,Gov_Grant
bins,State,Unnamed: 2_level_1,Unnamed: 3_level_1
Low Number of Financing Sources,California,4449854.312,0.219
Low Number of Financing Sources,Georgia,3273260.031,0.0
Low Number of Financing Sources,Ohio,2958827.825,0.0
Low Number of Financing Sources,Virginia,3347087.595,0.589
Medium Number of Financing Sources,California,3363724.663,0.435
Medium Number of Financing Sources,Georgia,2937567.756,0.072
Medium Number of Financing Sources,Ohio,2210445.828,0.028
Medium Number of Financing Sources,Virginia,1240936.0,0.778
High Number of Financing Sources,California,2650199.576,0.506
High Number of Financing Sources,Georgia,3211673.167,0.0


In [76]:
state_bin["key"] = state_bin.reset_index()["bins"].astype(str) + state_bin.reset_index()["State"].astype(str)

In [77]:
# Do groupby but with count
# we notice that Virginia has few medium and high, and georgia has 
# few high 
financing[["bins","State","Amount of Funds","Gov_Grant"]].groupby(["bins","State"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Amount of Funds,Gov_Grant
bins,State,Unnamed: 2_level_1,Unnamed: 3_level_1
Low Number of Financing Sources,California,276,277
Low Number of Financing Sources,Georgia,138,138
Low Number of Financing Sources,Ohio,65,65
Low Number of Financing Sources,Virginia,91,95
Medium Number of Financing Sources,California,109,109
Medium Number of Financing Sources,Georgia,87,87
Medium Number of Financing Sources,Ohio,76,76
Medium Number of Financing Sources,Virginia,8,9
High Number of Financing Sources,California,177,177
High Number of Financing Sources,Georgia,12,12


In [78]:
#by project 
financing[["bins","State","Amount of Funds","Gov_Grant","Project Name"]].groupby(["bins","Project Name"]).mean().groupby(["bins"]).mean()

Unnamed: 0_level_0,Amount of Funds,Gov_Grant
bins,Unnamed: 1_level_1,Unnamed: 2_level_1
Low Number of Financing Sources,3763468.228,0.25
Medium Number of Financing Sources,2821423.538,0.214
High Number of Financing Sources,2135394.496,0.256


### Can we break out projects with State Housing Credit Equity?

In [79]:
#label state housing credit equity 
financing = financing.rename(columns={"Financing_sub_cat_x":"Financing_sub_cat"})
state_housing_credit_equity = ['State Housing Credit Equity']

def label_shce(x):
    if x["Financing_sub_cat"] in state_housing_credit_equity:
        return 1
    else:
        return 0 
financing["State_Housing_Credit_Equity"] = financing.apply(lambda x: label_shce(x),axis=1)

In [80]:
financing[["State_Housing_Credit_Equity","Amount of Funds"]].groupby("State_Housing_Credit_Equity").sum()

Unnamed: 0_level_0,Amount of Funds
State_Housing_Credit_Equity,Unnamed: 1_level_1
0,3503854357.9
1,177023109.9


In [81]:
#4.8% of total funding 
177023109.90/(3503854357.90+177023109.90)

0.04809263862994163

In [82]:
financing["State_Housing_Credit_Equity"].value_counts()

0    1143
1      85
Name: State_Housing_Credit_Equity, dtype: int64

In [83]:
financing.groupby(["State",'State_Housing_Credit_Equity']).mean()[["Gov_Grant","Amount of Funds","Funding_count"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Gov_Grant,Amount of Funds,Funding_count
State,State_Housing_Credit_Equity,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
California,0,0.359,3565371.628,10.03
Georgia,0,0.034,3297634.801,8.358
Georgia,1,0.0,2687100.162,8.066
Ohio,0,0.024,2124785.64,11.952
Ohio,1,0.0,400454.545,12.182
Virginia,0,0.596,2784806.807,5.439
Virginia,1,0.0,2150000.0,12.0


In [84]:
financing_type = financing[(financing["Funding Type"] != "Grants")&(financing["Funding Type"] != "Subsidized Financing")]
financing_type.groupby(["State",'State_Housing_Credit_Equity']).mean()[["Gov_Grant","Amount of Funds","Funding_count"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Gov_Grant,Amount of Funds,Funding_count
State,State_Housing_Credit_Equity,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
California,0,0.359,3565371.628,10.03
Georgia,0,0.034,3297634.801,8.358
Georgia,1,0.0,2687100.162,8.066
Ohio,0,0.024,2124785.64,11.952
Ohio,1,0.0,400454.545,12.182
Virginia,0,0.637,2963268.122,5.304
Virginia,1,0.0,2150000.0,12.0


In [85]:
financing_type.groupby(["State",'State_Housing_Credit_Equity',"Funding Type"]).mean()[["Gov_Grant","Amount of Funds","Funding_count"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Gov_Grant,Amount of Funds,Funding_count
State,State_Housing_Credit_Equity,Funding Type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
California,0,Construction Financing,0.303,4645243.841,9.911
California,0,Permanent Financing,0.43,2198114.069,10.181
Georgia,0,Construction Financing,0.037,3717147.297,8.39
Georgia,0,Permanent Financing,0.032,2931677.093,8.33
Georgia,1,Construction Financing,0.0,1451176.797,8.1
Georgia,1,Permanent Financing,0.0,3883155.032,8.032
Ohio,0,Construction Financing,0.013,2010117.555,11.961
Ohio,0,Permanent Financing,0.036,2251086.72,11.942
Ohio,1,Construction Financing,0.0,391818.182,12.182
Ohio,1,Permanent Financing,0.0,409090.909,12.182


### Below here is Various scratch work from QU, will eventually delete. 

In [86]:
financing[["Gov_Grant","Project Type"]].groupby("Project Type").mean().reset_index()[1:]

Unnamed: 0,Project Type,Gov_Grant
1,Family,0.184
2,Seniors,0.074
3,Service Enriched,0.114
4,Special Needs,0.5


In [87]:
financing["Gov_Grant"].value_counts()

0    945
1    283
Name: Gov_Grant, dtype: int64

In [88]:
financing[["Gov_Grant","Amount of Funds"]].groupby("Gov_Grant").sum()

Unnamed: 0_level_0,Amount of Funds
Gov_Grant,Unnamed: 1_level_1
0,3056441134.8
1,624436333.0


In [89]:
financing[["debt_financing","Amount of Funds"]].groupby("debt_financing").sum()

Unnamed: 0_level_0,Amount of Funds
debt_financing,Unnamed: 1_level_1
0.0,1614802253.42
1.0,1830620383.81


In [90]:
financing_type = financing[(financing["Funding Type"] != "Grants")&(financing["Funding Type"] != "Subsidized Financing")]

In [91]:
financing_type[["debt_financing","Amount of Funds","Funding Type"]].groupby(["debt_financing","Funding Type"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Amount of Funds
debt_financing,Funding Type,Unnamed: 2_level_1
0.0,Construction Financing,592243928.37
0.0,Permanent Financing,1009414659.05
1.0,Construction Financing,1496950545.81
1.0,Permanent Financing,333669838.0


In [92]:
financing_type[financing_type.State_Housing_Credit_Equity.notnull()].groupby('Funding Type').apply(lambda x: 
                                             np.nansum(x.State_Housing_Credit_Equity*x['Amount of Funds'])/np.nansum(x['Amount of Funds']))

Funding Type
Construction Financing   0.021
Permanent Financing      0.091
dtype: float64

In [93]:
financing_type[financing_type.Gov_Grant.notnull()].groupby('Funding Type').apply(lambda x: 
                                             np.nansum(x.Gov_Grant*x['Amount of Funds'])/np.nansum(x['Amount of Funds']))

Funding Type
Construction Financing   0.108
Permanent Financing      0.269
dtype: float64

In [94]:
financing_type[financing_type.debt_financing.notnull()].groupby('Funding Type').apply(lambda x: 
                                             np.nansum(x.debt_financing*x['Amount of Funds'])/np.nansum(x['Amount of Funds']))

Funding Type
Construction Financing   0.717
Permanent Financing      0.248
dtype: float64

In [95]:
#Construction Financing 
1496950545.810/(1496950545.810+592243928.370)

0.7165204409213971

In [96]:
#Permanent Finacing 
333669838.000/(333669838.000+1009414659.050)

0.24843547724129394

In [97]:
#grants is just virginia and just three. and subsidized financing is just 9 things in virginia  
financing["Funding Type"] = financing["Funding Type"].replace({"Permanent Financing Sources": 'Permanent Financing',
                                   'Construction Financing Sources':"Construction Financing"})

In [98]:
#total percentage of financing that is debt financing.
#should break this down by construction and perm 
1830620383.810/(1830620383.810+1614802253.420)

0.5313195437996411

In [99]:
624436333.00/(624436333.00+3056441134.80)

0.1696433359878223

In [100]:
945+283

1228

In [101]:
financing[["State_Housing_Credit_Equity","Amount of Funds"]].groupby("State_Housing_Credit_Equity").sum()

Unnamed: 0_level_0,Amount of Funds
State_Housing_Credit_Equity,Unnamed: 1_level_1
0,3503854357.9
1,177023109.9


In [102]:
177023109.90/(3503854357.90+177023109.90)

0.04809263862994163

In [103]:
financing["State_Housing_Credit_Equity"].value_counts()

0    1143
1      85
Name: State_Housing_Credit_Equity, dtype: int64

In [104]:
perm_financing = financing[financing["Funding Type"] =="Permanent Financing"]

In [105]:
perm_by_state = perm_financing[["debt_financing","Amount of Funds","State"]].groupby(["debt_financing","State"]).count()

In [106]:
perm_by_type = perm_financing[["debt_financing","Amount of Funds","Project Type"]].groupby(["debt_financing","Project Type"]).count()

In [107]:
perm_by_type

Unnamed: 0_level_0,Unnamed: 1_level_0,Amount of Funds
debt_financing,Project Type,Unnamed: 2_level_1
0.0,At-Risk,3
0.0,Family,189
0.0,Seniors,66
0.0,Service Enriched,10
0.0,Special Needs,54
1.0,At-Risk,14
1.0,Family,79
1.0,Seniors,29
1.0,Service Enriched,2
1.0,Special Needs,8


In [108]:
perm_by_type = perm_by_type.reset_index()

In [109]:
#special needs
8/(8+54)

0.12903225806451613

In [110]:
#seniors 
19/(19+46)

0.2923076923076923

In [111]:
#family 
59/(59+143)

0.29207920792079206

In [112]:
#at risk 
14/(14+3)

0.8235294117647058

In [113]:
#special needs

#seniords 

#family 

In [114]:
perm_financing[["debt_financing","Amount of Funds"]].groupby(["debt_financing"]).count()

Unnamed: 0_level_0,Amount of Funds
debt_financing,Unnamed: 1_level_1
0.0,397
1.0,138


In [115]:
100/(100+310)

0.24390243902439024

In [116]:
perm_by_state

Unnamed: 0_level_0,Unnamed: 1_level_0,Amount of Funds
debt_financing,State,Unnamed: 2_level_1
0.0,California,155
0.0,Georgia,91
0.0,Ohio,76
0.0,Virginia,75
1.0,California,66
1.0,Georgia,34
1.0,Ohio,32
1.0,Virginia,6


In [117]:
#virginia by state 
10/(10+71)

0.12345679012345678

In [118]:
34/(34+91)

0.272

In [119]:
56/(56+148)

0.27450980392156865

In [120]:
perm_financing["State"].unique()

array(['California', 'Virginia', 'Ohio', 'Georgia'], dtype=object)

In [121]:
284339584.00/(284339584.00+758462934.71)

0.27266867781614357

In [122]:
perm_financing.groupby(["debt_financing"]).sum()

Unnamed: 0_level_0,Interest Rate,Amount of Funds,Gov_Grant,Funding_count,def_project,State_Housing_Credit_Equity
debt_financing,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.0,455.86,1009414659.05,179,3812,273,44
1.0,423.856,333669838.0,0,1237,96,0


In [123]:
279206433.000/(279206433.000+763596085.710)

0.2677462203921339

In [124]:
perm_financing["debt_financing"].value_counts()

0.000    397
1.000    138
Name: debt_financing, dtype: int64

In [125]:
#24.8% of all permanent financing is debt financing, which represents 27.2% of the total amount of permanent financing. 
106/(106+321)

0.24824355971896955

In [126]:
perm_financing = perm_financing[perm_financing["Project Type"]!="At-Risk"]

In [127]:
perm_financing.groupby(["debt_financing","State"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Project Name,Financing_sub_cat,Interest Rate,Funding Type,Project Type,Amount of Funds,Gov_Grant,Funding_count,def_project,bins,State_Housing_Credit_Equity
debt_financing,State,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0.0,California,152,152,99,152,152,152,152,152,152,152,152
0.0,Georgia,91,91,21,91,91,91,91,91,91,91,91
0.0,Ohio,76,76,0,76,76,76,76,76,76,76,76
0.0,Virginia,75,75,73,75,0,75,75,75,75,75,75
1.0,California,52,52,48,52,52,52,52,52,52,52,52
1.0,Georgia,34,34,34,34,34,34,34,34,34,34,34
1.0,Ohio,32,32,0,32,32,32,32,32,32,32,32
1.0,Virginia,6,6,5,6,0,6,6,6,6,6,6


In [128]:
perm_financing.groupby(["debt_financing","State"]).mean().drop(columns={"State_Housing_Credit_Equity"})

Unnamed: 0_level_0,Unnamed: 1_level_0,Interest Rate,Amount of Funds,Gov_Grant,Funding_count,def_project
debt_financing,State,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.0,California,3.019,1996461.737,0.704,11.579,0.776
0.0,Georgia,0.655,3775201.7,0.033,8.154,0.857
0.0,Ohio,,3234454.912,0.066,11.329,0.974
0.0,Virginia,1.962,1543530.867,0.853,5.72,0.0
1.0,California,5.243,2910434.673,0.0,8.846,0.692
1.0,Georgia,3.286,1541532.294,0.0,8.529,0.735
1.0,Ohio,,1701981.406,0.0,11.656,0.938
1.0,Virginia,4.788,5956833.333,0.0,4.333,0.0


In [129]:
114/(114+313)

0.26697892271662765

In [130]:
85/(85+1143)

0.06921824104234528

In [131]:
177023109.90/(177023109.90+3503854357.90)

0.04809263862994163

Differences between perm and construction financing

In [132]:
alt.Chart(financing).mark_bar().encode(
    x='Funding Type',
    y='Amount of Funds',
    color=alt.Color('Financing_sub_cat', scale=alt.Scale(scheme="pastel2")),
    tooltip=['Financing_sub_cat']
    
)