In [1]:
import pandas as pd
import requests
import csv

In [2]:
df = pd.read_csv("data/College_Control_Towns.csv")

# Filter by states with more than 20 college founding experiments
counts = df["State"].value_counts()
states_include = counts[counts > 20].index
df = df[df["State"].isin(states_include)]

In [12]:
df['Experiment'] = list(zip(df['College_Abbrev'], df['Year_Founded']))
df2 = df[["Experiment", "County", "State", "Treatment"]]
df2.head()

Unnamed: 0,Experiment,County,State,Treatment
0,"(AlAMU, 1875)",Madison,Alabama,1
1,"(AlAMU, 1875)",Montgomery,Alabama,0
2,"(AlAMU, 1875)",Macon,Alabama,0
3,"(AubU, 1856)",Lee,Alabama,1
4,"(AubU, 1856)",Lauderdale,Alabama,0


In [5]:
fips = pd.read_csv("state_and_county_fips_master.csv")
states = {"AL":"Alabama","AK":"Alaska","AZ":"Arizona","AR":"Arkansas","CA":"California","CO":"Colorado","CT":"Connecticut","DE":"Delaware","FL":"Florida","GA":"Georgia","HI":"Hawaii","ID":"Idaho","IL":"Illinois","IN":"Indiana","IA":"Iowa","KS":"Kansas","KY":"Kentucky","LA":"Louisiana","ME":"Maine","MD":"Maryland","MA":"Massachusetts","MI":"Michigan","MN":"Minnesota","MS":"Mississippi","MO":"Missouri","MT":"Montana","NE":"Nebraska","NV":"Nevada","NH":"New Hampshire","NJ":"New Jersey","NM":"New Mexico","NY":"New York","NC":"North Carolina","ND":"North Dakota","OH":"Ohio","OK":"Oklahoma","OR":"Oregon","PA":"Pennsylvania","RI":"Rhode Island","SC":"South Carolina","SD":"South Dakota","TN":"Tennessee","TX":"Texas","UT":"Utah","VT":"Vermont","VA":"Virginia","WA":"Washington","WV":"West Virginia","WI":"Wisconsin","WY":"Wyoming"}
fips["county_match"] = fips["name"].str.replace(' County', '')
fips["state_match"] = [states[x] if x in states else None for x in fips["state"]]
fips["fips"] = [str(x) for x in fips["fips"]]

In [6]:
combined = df2.join(fips.set_index(["county_match", "state_match"]), on = ("County", "State"), how = "left")
combined = combined[["Experiment", "Treatment", "fips", "State"]]
combined.head()

Unnamed: 0,Experiment,Treatment,fips,State
0,"(AlAMU, 1875)",1,1089,Alabama
1,"(AlAMU, 1875)",0,1101,Alabama
2,"(AlAMU, 1875)",0,1087,Alabama
3,"(AubU, 1856)",1,1081,Alabama
4,"(AubU, 1856)",0,1077,Alabama


In [7]:
matched = combined.groupby(["Experiment", "Treatment"])["fips"].agg(['unique']).reset_index()
matched = matched.rename(columns = {"unique" : "fips"})
matched["College"] = [tupule[0] for tupule in matched["Experiment"]]
matched["Year"] = [tupule[1] for tupule in matched["Experiment"]]
matched = matched.drop(["Experiment"], axis = 1)
matched.head()

Unnamed: 0,Treatment,fips,College,Year
0,0,[39133],AkronU,1870
1,1,[39153],AkronU,1870
2,0,"[1101, 1087]",AlAMU,1875
3,1,[1089],AlAMU,1875
4,0,"[28003, 28017, 28093]",AlcSU,1871


In [9]:
combined.to_csv('data/intermediate/matched_colleges.csv',)  

In [77]:
key = ("4a0f98ca07a7da0e0df171eabffd7452c7b473bc") 

In [134]:
base_url = "http://api.census.gov/data/timeseries/poverty/saipe"
cols = "NAME,GEOID,SAEMHI_PT,SAEPOVRTALL_PT"
year = "2018"

In [126]:
data_url = f"{base_url}?get={cols}&for=county:{fips[2:5]}&in=state:{fips[0:2]}&time={year}&key={key}"
data_url

'http://api.census.gov/data/timeseries/poverty/saipe?get=NAME,SAEMHI_PT,SAEPOVRTALL_PT&for=county:133&in=state:39&time=2018&key=4a0f98ca07a7da0e0df171eabffd7452c7b473bc'

In [132]:
response = requests.get(data_url)
response.status_code

200

In [147]:
all_fips = [list(set(item)) for item in matched["fips"]]
all_fips = [element for sublist in all_fips for element in sublist]
all_fips = set(all_fips)

In [150]:
for fip in all_fips:
    if not isinstance(fip, str):
        print(fip)
        continue
    data_url = f"{base_url}?get={cols}&for=county:{fip[2:5]}&in=state:{fip[0:2]}&time={year}&key={key}"
    response = requests.get(data_url)
    if response.status_code != 200:
        print(response.status_code)
        continue
    data = response.json()
    with open("income_data.txt", "a", newline="") as writefile:
        writer = csv.writer(writefile, quoting=csv.QUOTE_ALL, delimiter=",")
        writer.writerows(data)

KeyboardInterrupt: 

In [32]:
df['Experiment'] = list(zip(df['College_Abbrev'], df['Year_Founded']))
df3 = df[["Experiment", "Town", "County", "State", "Treatment"]].copy()
df3.head()

Unnamed: 0,Experiment,Town,County,State,Treatment
0,"(AlAMU, 1875)",Huntsville,Madison,Alabama,1
1,"(AlAMU, 1875)",Montgomery,Montgomery,Alabama,0
2,"(AlAMU, 1875)",Tuskeegee,Macon,Alabama,0
3,"(AubU, 1856)",Auburn,Lee,Alabama,1
4,"(AubU, 1856)",Florence,Lauderdale,Alabama,0


In [10]:
import osmnx as ox

In [33]:
df3["Search_Place"] = df3["Town"] + ', ' + df3["State"]
df3.shape

(712, 6)

In [34]:
df3["Graph"] = None

for i in range(3):
    try:
        g = ox.graph_from_place(df3["Search_Place"].iloc[i], simplify = True, network_type = "drive")
    except Exception as e:
        g = None
    df3["Graph"].iloc[i] = g

KeyboardInterrupt: 

In [None]:
from torch_geometric.nn import GCNConv

model = GCNConv(in_features=16, out_features=32)


In [None]:
class GCN(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_features, hidden_features)
        self.conv2 = GCNConv(hidden_features, out_features)
        self.clf = nn.Linear(out_features, 2)  # Assuming 2 classes (city/rural)

    def forward(self, x, edge_index):
        # Pass through GCN layers as before (refer to previous examples for details)
        x = self.clf(x)
        x = F.log_softmax(x, dim=1)
        return x