In [1]:
from datetime import datetime
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
cd gdrive/MyDrive/SPARSe

/content/gdrive/MyDrive/SPARSe


In [4]:
!pip install SQLAlchemy==1.3.18 PyYAML==6.0 psycopg2-binary==2.9.3

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
import pandas as pd
pd.set_option('display.max_columns', None)

In [6]:
donations = pd.read_csv("./data/donations.csv")

In [7]:
projects = pd.read_csv("./data/projects.csv")

In [44]:
len(projects)

664098

In [8]:
projects_donations_df = projects.merge(donations, on='projectid', how='left')

In [10]:
projects_donations_df['date_posted'] = projects_donations_df.date_posted.apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))

In [11]:
projects_donations_df['deadline'] = projects_donations_df['date_posted'] + pd.DateOffset(months=4)

In [12]:
# Filter to donations before deadline
projects_donations_filtered_df = projects_donations_df[projects_donations_df.donation_timestamp <= projects_donations_df.deadline]

In [13]:
print(len(projects_donations_df), len(projects_donations_filtered_df))

3237045 2894222


In [14]:
funded_amt_df = projects_donations_filtered_df.groupby('projectid')['donation_to_project'].sum().reset_index(name='funded_amt')

In [15]:
funding_frac_df = projects.merge(funded_amt_df, on='projectid', how='left')[['projectid', 'funded_amt', 'total_price_excluding_optional_support']].fillna(0.0)

In [16]:
funding_frac_df

Unnamed: 0,projectid,funded_amt,total_price_excluding_optional_support
0,316ed8fb3b81402ff6ac8f721bb31192,0.0,555.81
1,90de744e368a7e4883223ca49318ae30,0.0,296.47
2,32943bb1063267de6ed19fc0ceb4b9a7,0.0,430.89
3,bb18f409abda2f264d5acda8cab577a9,0.0,576.07
4,24761b686e18e5eace634607acbcc19f,0.0,408.40
...,...,...,...
664093,a7236ea96c812895cafc5d700d779147,254.0,231.00
664094,e02da37beb332eb66c2d2ba989c597ad,1241.0,1129.00
664095,82e536f14eadf2671a70e03416f695a3,125.0,125.00
664096,e139df754a873a62d93daa56acbf8040,125.0,125.00


In [17]:
funding_frac_df['funding_frac'] = funding_frac_df.funded_amt / funding_frac_df.total_price_excluding_optional_support

In [34]:
funding_frac_df = funding_frac_df.fillna(1.0)

In [33]:
funding_frac_df["is_fully_funded"] = funding_frac_df.funding_frac >= 1.0

In [36]:
projects.columns

Index(['projectid', 'teacher_acctid', 'schoolid', 'school_ncesid',
       'school_latitude', 'school_longitude', 'school_city', 'school_state',
       'school_zip', 'school_metro', 'school_district', 'school_county',
       'school_charter', 'school_magnet', 'school_year_round', 'school_nlns',
       'school_kipp', 'school_charter_ready_promise', 'teacher_prefix',
       'teacher_teach_for_america', 'teacher_ny_teaching_fellow',
       'primary_focus_subject', 'primary_focus_area',
       'secondary_focus_subject', 'secondary_focus_area', 'resource_type',
       'poverty_level', 'grade_level', 'fulfillment_labor_materials',
       'total_price_excluding_optional_support',
       'total_price_including_optional_support', 'students_reached',
       'eligible_double_your_impact_match', 'eligible_almost_home_match',
       'date_posted'],
      dtype='object')

In [37]:
len(projects), len(funding_frac_df)

(664098, 664098)

In [38]:
projects_with_funding_frac = funding_frac_df.merge(projects, on="projectid")

In [40]:
projects_with_funding_frac.groupby(["eligible_double_your_impact_match", "is_fully_funded"]).size()

eligible_double_your_impact_match  is_fully_funded
f                                  False              232607
                                   True               253139
t                                  False               65538
                                   True               112814
dtype: int64

In [47]:
print(f"Proportion fully funded in 4 months, not eligible for double your impact match: {253139/(253139 + 232607)}")

Proportion fully funded in 4 months, not eligible for double your impact match: 0.5211345023942554


In [48]:
print(f"Proportion fully funded in 4 months, eligible for double your impact match: {112814/(112814 + 65538)}")

Proportion fully funded in 4 months, eligible for double your impact match: 0.6325356598187853


In [49]:
projects_with_funding_frac.groupby(["poverty_level", "is_fully_funded"]).size()

poverty_level     is_fully_funded
high poverty      False               83983
                  True                89578
highest poverty   False              161460
                  True               222029
low poverty       False                7828
                  True                 8883
moderate poverty  False               44874
                  True                45463
dtype: int64

In [50]:
print(f"Proportion fully funded in 4 months, highest poverty: {222029/(222029 + 161460)}")

Proportion fully funded in 4 months, highest poverty: 0.578970974395615


In [51]:
print(f"Proportion fully funded in 4 months, high poverty: {89578/(89578 + 83983)}")

Proportion fully funded in 4 months, high poverty: 0.5161182523723647


In [52]:
print(f"Proportion fully funded in 4 months, moderate poverty: {45463/(45463 + 44874)}")

Proportion fully funded in 4 months, moderate poverty: 0.5032600152761327


In [53]:
print(f"Proportion fully funded in 4 months, low poverty: {8883/(8883 + 7828)}")

Proportion fully funded in 4 months, low poverty: 0.5315660343486326
