## Closest University
---
In this Jupyter notebook, we associate each city with the single nearest Trilogy campus using Google's distance matrix, and then Vincenty's distance formula.

If time column has a value, then pick shortest time, otherwise default to vincenty distance value in column ["Distance"]

### Dependencies

In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import time 
import sqlite3
from sqlalchemy import create_engine
import pymysql
pymysql.install_as_MySQLdb()

### Read the City Pair Combinations

In [2]:
conn = sqlite3.connect("Opportunity_Map.db")
distance_values = pd.read_sql("select * from FullDistances", conn)
distance_values.head()

Unnamed: 0,Asian Population,Black Population,City,City Lat,City Lng,CityState,Distance,Distance_Text,Education Associates,Education Bachelors,...,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,University,Uni CityState,White Population,Physical_Campus
0,0.0,53.0,CARRSVILLE,36.73739,-76.840511,"CARRSVILLE, VA",65.400534,79.1 mi,157.0,113.0,...,93.0,VA,5665.0,1 hour 34 mins,37.548987,-77.453429,UR,"RICHMOND, VA",1549.0,False
1,0.0,53.0,CARRSVILLE,36.73739,-76.840511,"CARRSVILLE, VA",135.829465,176 mi,157.0,113.0,...,93.0,VA,10258.0,2 hours 51 mins,35.904912,-79.046913,UNC,"CHAPEL HILL, NC",1549.0,False
2,0.0,53.0,CARRSVILLE,36.73739,-76.840511,"CARRSVILLE, VA",65.400534,79.1 mi,157.0,113.0,...,93.0,VA,5665.0,1 hour 34 mins,37.548987,-77.453429,UR,"RICHMOND, VA",1549.0,False
3,0.0,53.0,CARRSVILLE,36.73739,-76.840511,"CARRSVILLE, VA",149.564762,185 mi,157.0,113.0,...,93.0,VA,11045.0,3 hours 4 mins,38.899715,-77.048599,GW,"WASHINGTON, DC",1549.0,False
4,0.0,613.0,CARSON,37.016961,-77.395057,"CARSON, VA",36.830103,41.3 mi,109.0,91.0,...,20.0,VA,2610.0,44 mins,37.548987,-77.453429,UR,"RICHMOND, VA",680.0,False


In [3]:
distance_values.dtypes

Asian Population                          float64
Black Population                          float64
City                                       object
City Lat                                  float64
City Lng                                  float64
CityState                                  object
Distance                                  float64
Distance_Text                              object
Education Associates                      float64
Education Bachelors                       float64
Education Doctorate                       float64
Education GED                             float64
Education High School                     float64
Education Masters                         float64
Education None                            float64
Education Professional                    float64
Employment Female Computer Engineering    float64
Employment Labor Force                    float64
Employment Male Computer Engineering      float64
Employment Unemployed                     float64


In [4]:
distance_values[distance_values["CityState"] == "HOUSTON, TX"]

Unnamed: 0,Asian Population,Black Population,City,City Lat,City Lng,CityState,Distance,Distance_Text,Education Associates,Education Bachelors,...,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,University,Uni CityState,White Population,Physical_Campus
9845,230549.0,684416.0,HOUSTON,29.775734,-95.414548,"HOUSTON, TX",5.801359,8.9 mi,100014.0,349920.0,...,633609.0,TX,1147.0,19 mins,29.719949,-95.342233,UT,"HOUSTON, TX",1775897.0,True
9846,230549.0,684416.0,HOUSTON,29.775734,-95.414548,"HOUSTON, TX",143.373805,162 mi,100014.0,349920.0,...,633609.0,TX,9329.0,2 hours 35 mins,30.284918,-97.734057,UT,"AUSTIN, TX",1775897.0,False
9847,230549.0,684416.0,HOUSTON,29.775734,-95.414548,"HOUSTON, TX",186.906597,193 mi,100014.0,349920.0,...,633609.0,TX,10612.0,2 hours 57 mins,29.4241,-98.4936,UTSA,"SAN ANTONIO, TX",1775897.0,False


In [5]:
distance_values[distance_values["CityState"] == "ABBOTT, TX"]

Unnamed: 0,Asian Population,Black Population,City,City Lat,City Lng,CityState,Distance,Distance_Text,Education Associates,Education Bachelors,...,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,University,Uni CityState,White Population,Physical_Campus
9142,0.0,9.0,ABBOTT,31.881728,-97.08541,"ABBOTT, TX",181.440631,208 mi,90.0,120.0,...,109.0,TX,11491.0,3 hours 12 mins,29.719949,-95.342233,UT,"HOUSTON, TX",985.0,False
9143,0.0,9.0,ABBOTT,31.881728,-97.08541,"ABBOTT, TX",68.416138,80.3 mi,90.0,120.0,...,109.0,TX,4534.0,1 hour 16 mins,32.841218,-96.784518,SMU,"DALLAS, TX",985.0,False
9144,0.0,9.0,ABBOTT,31.881728,-97.08541,"ABBOTT, TX",116.535657,124 mi,90.0,120.0,...,109.0,TX,6637.0,1 hour 51 mins,30.284918,-97.734057,UT,"AUSTIN, TX",985.0,False
9145,0.0,9.0,ABBOTT,31.881728,-97.08541,"ABBOTT, TX",188.928462,204 mi,90.0,120.0,...,109.0,TX,10994.0,3 hours 3 mins,29.4241,-98.4936,UTSA,"SAN ANTONIO, TX",985.0,False


In [6]:
test = distance_values[distance_values["Time"].isnull()]

In [7]:
test.head()

Unnamed: 0,Asian Population,Black Population,City,City Lat,City Lng,CityState,Distance,Distance_Text,Education Associates,Education Bachelors,...,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,University,Uni CityState,White Population,Physical_Campus
1087,8.0,667.0,MC KENNEY,36.98943,-77.72029,"MC KENNEY, VA",105.133375,,158.0,173.0,...,401.0,VA,,,35.904912,-79.046913,UNC,"CHAPEL HILL, NC",1559.0,False
1732,63.0,5226.0,SMITHFIELD,37.001261,-76.655932,"SMITHFIELD, VA",57.950293,,1036.0,2281.0,...,2434.0,VA,,,37.548987,-77.453429,UR,"RICHMOND, VA",11609.0,False
1883,0.0,0.0,TANGIER,37.829055,-75.984124,"TANGIER, VA",82.810104,,10.0,24.0,...,76.0,VA,,,37.548987,-77.453429,UR,"RICHMOND, VA",487.0,False
1884,0.0,0.0,TANGIER,37.829055,-75.984124,"TANGIER, VA",82.810104,,10.0,24.0,...,76.0,VA,,,37.548987,-77.453429,UR,"RICHMOND, VA",487.0,False
1885,0.0,0.0,TANGIER,37.829055,-75.984124,"TANGIER, VA",93.780761,,10.0,24.0,...,76.0,VA,,,38.899715,-77.048599,GW,"WASHINGTON, DC",487.0,False


In [8]:
test_2 = test.loc[test.groupby("CityState")["Distance"].idxmin()]

In [9]:
test_2.head()

Unnamed: 0,Asian Population,Black Population,City,City Lat,City Lng,CityState,Distance,Distance_Text,Education Associates,Education Bachelors,...,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,University,Uni CityState,White Population,Physical_Campus
63043,0.0,0.0,ABBYVILLE,38.006359,-98.206324,"ABBYVILLE, KS",208.322033,,18.0,10.0,...,26.0,KS,,,39.1141,-94.6275,KU,"KANSAS CITY, KS",330.0,False
68253,708.0,541.0,ABERDEEN,45.482335,-98.53254,"ABERDEEN, SD",260.911653,,2120.0,4344.0,...,3242.0,SD,,,44.975354,-93.233074,MINN,"MINNEAPOLIS, MN",28656.0,False
48927,5.0,53.0,ABERNATHY,33.857882,-101.881538,"ABERNATHY, TX",302.981336,,101.0,214.0,...,695.0,TX,,,32.841218,-96.784518,SMU,"DALLAS, TX",3452.0,False
53744,0.0,0.0,ABIQUIU,36.282896,-106.411988,"ABIQUIU, NM",247.078038,,0.0,142.0,...,78.0,NM,,,39.676617,-104.961896,DU,"DENVER, CO",524.0,False
52540,22.0,381.0,ABITA SPRINGS,30.484687,-89.953028,"ABITA SPRINGS, LA",327.007194,,188.0,1144.0,...,816.0,LA,,,29.719949,-95.342233,UT,"HOUSTON, TX",7301.0,False


In [10]:
test_2["CityState"].nunique()

5927

### Identify the Single Closest University for Each City

In [11]:
closest_location_google = distance_values[distance_values["Time"].notnull()].loc[distance_values.groupby("CityState")["Time"].idxmin()][["CityState", "University", "Distance"]]

In [12]:
#closest_location = crow_distances.loc[crow_distances.groupby("CityState")["crow_distance"].idxmin()][["CityState", "University", "crow_distance"]]

In [13]:
closest_location_google_pd = pd.DataFrame(closest_location_google)
closest_location_google_pd.head(10)

Unnamed: 0,CityState,University,Distance
14746.0,"AARONSBURG, PA",PENN,140.339859
42505.0,"ABBEVILLE, AL",GTECH,157.537626
27582.0,"ABBEVILLE, GA",GTECH,138.382298
10872.0,"ABBEVILLE, LA",UT,189.538012
37355.0,"ABBEVILLE, MS",VAND,187.693841
6132.0,"ABBEVILLE, SC",GTECH,116.574634
19110.0,"ABBOT, ME",UNH,158.938804
32687.0,"ABBOTSFORD, WI",MINN,144.634102
9143.0,"ABBOTT, TX",SMU,68.416138
14749.0,"ABBOTTSTOWN, PA",PENN,94.93974


In [14]:
closest_location_google_pd.nunique()

CityState     21355
University       35
Distance      21355
dtype: int64

In [15]:
vincentys_one = distance_values[distance_values["Time"].isnull()]

In [16]:
closest_location_vincentys = vincentys_one.loc[vincentys_one.groupby("CityState")["Distance"].idxmin()][["CityState", "University", "Distance"]]

In [17]:
closest_location_vincentys_pd = pd.DataFrame(closest_location_vincentys)
closest_location_vincentys_pd.head(10)

Unnamed: 0,CityState,University,Distance
63043,"ABBYVILLE, KS",KU,208.322033
68253,"ABERDEEN, SD",MINN,260.911653
48927,"ABERNATHY, TX",SMU,302.981336
53744,"ABIQUIU, NM",DU,247.078038
52540,"ABITA SPRINGS, LA",UT,327.007194
67892,"ABSARAKA, ND",MINN,243.281935
64947,"ABSAROKEE, MT",UTAH,348.736084
48930,"ACKERLY, TX",UT,281.986828
68556,"ACKERMAN, MS",VAND,235.433623
52544,"ACME, LA",UT,239.030077


In [18]:
frames = [closest_location_google_pd, closest_location_vincentys_pd]
closest_location_pd = pd.concat(frames, join='outer')

In [19]:
closest_location_pd.head()

Unnamed: 0,CityState,University,Distance
14746.0,"AARONSBURG, PA",PENN,140.339859
42505.0,"ABBEVILLE, AL",GTECH,157.537626
27582.0,"ABBEVILLE, GA",GTECH,138.382298
10872.0,"ABBEVILLE, LA",UT,189.538012
37355.0,"ABBEVILLE, MS",VAND,187.693841


In [20]:
closest_location_pd.count()

CityState     27282
University    27282
Distance      27282
dtype: int64

In [21]:
closest_location_pd

Unnamed: 0,CityState,University,Distance
14746.0,"AARONSBURG, PA",PENN,140.339859
42505.0,"ABBEVILLE, AL",GTECH,157.537626
27582.0,"ABBEVILLE, GA",GTECH,138.382298
10872.0,"ABBEVILLE, LA",UT,189.538012
37355.0,"ABBEVILLE, MS",VAND,187.693841
6132.0,"ABBEVILLE, SC",GTECH,116.574634
19110.0,"ABBOT, ME",UNH,158.938804
32687.0,"ABBOTSFORD, WI",MINN,144.634102
9143.0,"ABBOTT, TX",SMU,68.416138
14749.0,"ABBOTTSTOWN, PA",PENN,94.939740


In [22]:
closest_location_pd = closest_location_pd[closest_location_pd["CityState"].notnull()].reset_index()

In [23]:
closest_location_pd = closest_location_pd[["CityState", "University", "Distance"]]

In [24]:
closest_location_pd.count()

CityState     27282
University    27282
Distance      27282
dtype: int64

In [25]:
closest_location_pd["Closest"] = True

In [26]:
closest_location_pd.count()

CityState     27282
University    27282
Distance      27282
Closest       27282
dtype: int64

In [27]:
distance_values.count()

Asian Population                          70093
Black Population                          70093
City                                      70093
City Lat                                  70093
City Lng                                  70093
CityState                                 70093
Distance                                  70093
Distance_Text                             47801
Education Associates                      70093
Education Bachelors                       70093
Education Doctorate                       70093
Education GED                             70093
Education High School                     70093
Education Masters                         70093
Education None                            70093
Education Professional                    70093
Employment Female Computer Engineering    70093
Employment Labor Force                    70093
Employment Male Computer Engineering      70093
Employment Unemployed                     70093
Google_URL                              

In [28]:
closest_location_pd.dtypes

CityState      object
University     object
Distance      float64
Closest          bool
dtype: object

In [29]:
distance_values.dtypes

Asian Population                          float64
Black Population                          float64
City                                       object
City Lat                                  float64
City Lng                                  float64
CityState                                  object
Distance                                  float64
Distance_Text                              object
Education Associates                      float64
Education Bachelors                       float64
Education Doctorate                       float64
Education GED                             float64
Education High School                     float64
Education Masters                         float64
Education None                            float64
Education Professional                    float64
Employment Female Computer Engineering    float64
Employment Labor Force                    float64
Employment Male Computer Engineering      float64
Employment Unemployed                     float64


In [30]:
# Rejoin with the original dataframe to determine which university met the condition
minimum_distances = pd.merge(closest_location_pd, distance_values, on=["CityState", "Distance", "University"], how="left")

In [31]:
minimum_distances.count()

CityState                                 29119
University                                29119
Distance                                  29119
Closest                                   29119
Asian Population                          29119
Black Population                          29119
City                                      29119
City Lat                                  29119
City Lng                                  29119
Distance_Text                             22731
Education Associates                      29119
Education Bachelors                       29119
Education Doctorate                       29119
Education GED                             29119
Education High School                     29119
Education Masters                         29119
Education None                            29119
Education Professional                    29119
Employment Female Computer Engineering    29119
Employment Labor Force                    29119
Employment Male Computer Engineering    

### Run Test Cases

In [32]:
minimum_distances.head(200)

Unnamed: 0,CityState,University,Distance,Closest,Asian Population,Black Population,City,City Lat,City Lng,Distance_Text,...,Population,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,Uni CityState,White Population,Physical_Campus
0,"AARONSBURG, PA",PENN,140.339859,True,0.0,0.0,AARONSBURG,40.895701,-77.392432,165 mi,...,1058.0,202.0,PA,10568.0,2 hours 56 mins,40.440600,-79.995900,"PITTSBURGH, PA",1058.0,FALSE
1,"ABBEVILLE, AL",GTECH,157.537626,True,0.0,2274.0,ABBEVILLE,31.595148,-85.208852,184 mi,...,6470.0,1363.0,AL,10990.0,3 hours 3 mins,33.775618,-84.396285,"ATLANTA, GA",4064.0,FALSE
2,"ABBEVILLE, GA",GTECH,138.382298,True,41.0,1903.0,ABBEVILLE,31.976256,-83.339665,158 mi,...,4719.0,689.0,GA,9364.0,2 hours 36 mins,33.775618,-84.396285,"ATLANTA, GA",2676.0,FALSE
3,"ABBEVILLE, LA",UT,189.538012,True,1059.0,6457.0,ABBEVILLE,29.894612,-92.193173,231 mi,...,26078.0,5164.0,LA,13180.0,3 hours 40 mins,29.719949,-95.342233,"HOUSTON, TX",17932.0,FALSE
4,"ABBEVILLE, MS",VAND,187.693841,True,19.0,1502.0,ABBEVILLE,34.492325,-89.443056,254 mi,...,2974.0,428.0,MS,14485.0,4 hours 1 min,36.144703,-86.802655,"NASHVILLE, TN",1432.0,FALSE
5,"ABBEVILLE, SC",GTECH,116.574634,True,0.0,4410.0,ABBEVILLE,34.182407,-82.425607,148 mi,...,13005.0,3382.0,SC,9006.0,2 hours 30 mins,33.775618,-84.396285,"ATLANTA, GA",8475.0,FALSE
6,"ABBOT, ME",UNH,158.938804,True,0.0,0.0,ABBOT,45.229183,-69.596521,207 mi,...,698.0,81.0,ME,13019.0,3 hours 37 mins,43.138948,-70.937025,"DURHAM, NH",683.0,FALSE
7,"ABBOTSFORD, WI",MINN,144.634102,True,12.0,0.0,ABBOTSFORD,44.967530,-90.282268,152 mi,...,2943.0,366.0,WI,8781.0,2 hours 26 mins,44.975354,-93.233074,"MINNEAPOLIS, MN",2780.0,FALSE
8,"ABBOTT, TX",SMU,68.416138,True,0.0,9.0,ABBOTT,31.881728,-97.085410,80.3 mi,...,1023.0,109.0,TX,4534.0,1 hour 16 mins,32.841218,-96.784518,"DALLAS, TX",985.0,FALSE
9,"ABBOTTSTOWN, PA",PENN,94.939740,True,15.0,17.0,ABBOTTSTOWN,39.895240,-76.978971,117 mi,...,3881.0,399.0,PA,7723.0,2 hours 9 mins,39.952219,-75.193214,"PHILADELPHIA, PA",3789.0,FALSE


In [33]:
minimum_distances["CityState"].nunique()

27238

In [34]:
minimum_distances[minimum_distances["Closest"] == False]

Unnamed: 0,CityState,University,Distance,Closest,Asian Population,Black Population,City,City Lat,City Lng,Distance_Text,...,Population,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,Uni CityState,White Population,Physical_Campus


In [35]:
minimum_distances[(minimum_distances["CityState"] == "HOUSTON, TX")]

Unnamed: 0,CityState,University,Distance,Closest,Asian Population,Black Population,City,City Lat,City Lng,Distance_Text,...,Population,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,Uni CityState,White Population,Physical_Campus
9660,"HOUSTON, TX",UT,5.801359,True,230549.0,684416.0,HOUSTON,29.775734,-95.414548,8.9 mi,...,3061887.0,633609.0,TX,1147.0,19 mins,29.719949,-95.342233,"HOUSTON, TX",1775897.0,True


In [36]:
minimum_distances[(minimum_distances["CityState"] == "COLLEGE STATION, TX")]

Unnamed: 0,CityState,University,Distance,Closest,Asian Population,Black Population,City,City Lat,City Lng,Distance_Text,...,Population,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,Uni CityState,White Population,Physical_Campus
4243,"COLLEGE STATION, TX",UT,82.671703,True,9757.0,7634.0,COLLEGE STATION,30.585239,-96.299284,97.0 mi,...,112074.0,32506.0,TX,5642.0,1 hour 34 mins,29.719949,-95.342233,"HOUSTON, TX",89674.0,False


In [37]:
minimum_distances[(minimum_distances["CityState"] == "BIRMINGHAM, AL")]

Unnamed: 0,CityState,University,Distance,Closest,Asian Population,Black Population,City,City Lat,City Lng,Distance_Text,...,Population,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,Uni CityState,White Population,Physical_Campus
1783,"BIRMINGHAM, AL",GTECH,139.819523,True,11585.0,217799.0,BIRMINGHAM,33.505136,-86.800174,152 mi,...,497450.0,92560.0,AL,8444.0,2 hours 21 mins,33.775618,-84.396285,"ATLANTA, GA",251913.0,False


In [38]:
minimum_distances[(minimum_distances["CityState"] == "NEW ORLEANS, LA")]

Unnamed: 0,CityState,University,Distance,Closest,Asian Population,Black Population,City,City Lat,City Lng,Distance_Text,...,Population,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,Uni CityState,White Population,Physical_Campus
26757,"NEW ORLEANS, LA",UT,317.843215,True,11942.0,229241.0,NEW ORLEANS,29.972238,-90.056357,,...,415247.0,102960.0,LA,,,29.719949,-95.342233,"HOUSTON, TX",160847.0,False


In [39]:
minimum_distances[(minimum_distances["CityState"] == "MADISON, WI")]

Unnamed: 0,CityState,University,Distance,Closest,Asian Population,Black Population,City,City Lat,City Lng,Distance_Text,...,Population,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,Uni CityState,White Population,Physical_Campus
12172,"MADISON, WI",UWM,4.632362,True,21903.0,20773.0,MADISON,43.070701,-89.321318,8.3 mi,...,284782.0,50586.0,WI,1190.0,20 mins,43.0766,-89.4125,"MADISON, WI",223464.0,True


In [40]:
minimum_distances["CityState"].nunique()

27238

In [41]:
minimum_distances[minimum_distances["CityState"] == "ZWOLLE, LA"]

Unnamed: 0,CityState,University,Distance,Closest,Asian Population,Black Population,City,City Lat,City Lng,Distance_Text,...,Population,Poverty,State,Time,Time_Text,Uni Lat,Uni Lng,Uni CityState,White Population,Physical_Campus
22729,"ZWOLLE, LA",UT,167.971661,True,36.0,1075.0,ZWOLLE,31.650584,-93.618811,203 mi,...,5721.0,1207.0,LA,12123.0,3 hours 22 mins,29.719949,-95.342233,"HOUSTON, TX",3151.0,False


### Export Data

In [42]:
minimum_distances.to_csv("Outputs/03-Minimum_Distances_Matrix.csv")

In [43]:
minimum_distances.to_sql("Minimum_Distances", conn, if_exists="replace", index=False)

  chunksize=chunksize, dtype=dtype)


In [44]:
# Check Tables in SQLite
cur = conn.cursor() 
res = cur.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;")
for name in res:
    print(name[0])

City_Census
FullDistances
Minimum_Distances
Rules_Added
Zip_Census


### Export to Cloud SQL

In [136]:
engine = create_engine('mysql+mysqldb://trilogy:DataRocks@35.227.28.228/mapping_data?unix_socket=/cloudsql/sql-projects:us-east1:opportunity-db')
conn = engine.connect()

minimum_distances.to_sql("Minimum_Distances", conn, if_exists="replace", index=False)