In [3]:
import os
import pandas as pd
from config import gkey
import requests

In [4]:
# retreive csv file and create df from csv file
file_path = os.path.join("nba-players-stats", "player_data.csv")
stats_df = pd.read_csv(file_path)
stats_df


Unnamed: 0,name,year_start,year_end,position,height,weight,birth_date,college
0,Alaa Abdelnaby,1991,1995,F-C,6-10,240.0,"June 24, 1968",Duke University
1,Zaid Abdul-Aziz,1969,1978,C-F,6-9,235.0,"April 7, 1946",Iowa State University
2,Kareem Abdul-Jabbar,1970,1989,C,7-2,225.0,"April 16, 1947","University of California, Los Angeles"
3,Mahmoud Abdul-Rauf,1991,2001,G,6-1,162.0,"March 9, 1969",Louisiana State University
4,Tariq Abdul-Wahad,1998,2003,F,6-6,223.0,"November 3, 1974",San Jose State University
...,...,...,...,...,...,...,...,...
4545,Ante Zizic,2018,2018,F-C,6-11,250.0,"January 4, 1997",
4546,Jim Zoet,1983,1983,C,7-1,240.0,"December 20, 1953",Kent State University
4547,Bill Zopf,1971,1971,G,6-1,170.0,"June 7, 1948",Duquesne University
4548,Ivica Zubac,2017,2018,C,7-1,265.0,"March 18, 1997",


In [5]:
# extract only the columns I need to work with
college_df = stats_df[["name", "college"]]
# drop any rows that have NaN for college- I won't be able to work with or map those
college_df = college_df.dropna(how='any')
college_df


Unnamed: 0,name,college
0,Alaa Abdelnaby,Duke University
1,Zaid Abdul-Aziz,Iowa State University
2,Kareem Abdul-Jabbar,"University of California, Los Angeles"
3,Mahmoud Abdul-Rauf,Louisiana State University
4,Tariq Abdul-Wahad,San Jose State University
...,...,...
4542,Derrick Zimmerman,Mississippi State University
4543,Stephen Zimmerman,"University of Nevada, Las Vegas"
4546,Jim Zoet,Kent State University
4547,Bill Zopf,Duquesne University


In [9]:
# create 2 new empty columns
college_df['lat'] = ""
college_df['lng'] = ""

# itterate through every index and row of df
for index, row in college_df.iterrows():
# sets target place to the name of each player's college
    target_place = "{0}".format(row['college'])
# sets parameters for url
    params = {
        "address": target_place,
        "key": gkey
    }
# if geocode can find the coordinates for the college, proceed, if not, output a print statement
    try:
        base_url = "https://maps.googleapis.com/maps/api/geocode/json"
# extracts contents of API
        response = requests.get(base_url, params = params).json()
# sets variable to the path to take within the API dictionary
        results = response['results'][0]["geometry"]["location"]
# adds the lat and lng of each college to the df
        college_df.loc[index, 'lat'] = results["lat"]
        college_df.loc[index, 'lng'] = results["lng"]
    except IndexError:
        print("Can't find coordinates of school... skipping")


Can't find coordinates of school... skipping


In [82]:
college_df

Unnamed: 0,name,college,lat,lng
0,Alaa Abdelnaby,Duke University,36.0014,-78.9382
1,Zaid Abdul-Aziz,Iowa State University,42.0266,-93.6465
2,Kareem Abdul-Jabbar,"University of California, Los Angeles",34.0689,-118.445
3,Mahmoud Abdul-Rauf,Louisiana State University,30.4133,-91.18
4,Tariq Abdul-Wahad,San Jose State University,37.3352,-121.881
...,...,...,...,...
4542,Derrick Zimmerman,Mississippi State University,33.4552,-88.7944
4543,Stephen Zimmerman,"University of Nevada, Las Vegas",36.1085,-115.143
4546,Jim Zoet,Kent State University,41.1456,-81.3393
4547,Bill Zopf,Duquesne University,40.4368,-79.9892


In [83]:
# saves df to csv
output_data_file = "Output csv's, colleges.csv"
college_df.to_csv(output_data_file)
# saves csv to df so I don't have to run above cells every time
file_path2 = os.path.join("Output csv's", "colleges.csv")
collegecsv_df = pd.read_csv(file_path2)


In [94]:
# drop any rows with NaN because geocode couldn't find those lat and lng- can't map those
collegecsv_df = collegecsv_df.dropna(how='any')
collegecsv_df

Unnamed: 0.1,Unnamed: 0,name,college,lat,lng
0,0,Alaa Abdelnaby,Duke University,36.001426,-78.938229
1,1,Zaid Abdul-Aziz,Iowa State University,42.026619,-93.646465
2,2,Kareem Abdul-Jabbar,"University of California, Los Angeles",34.068921,-118.445181
3,3,Mahmoud Abdul-Rauf,Louisiana State University,30.413258,-91.180002
4,4,Tariq Abdul-Wahad,San Jose State University,37.335187,-121.881072
...,...,...,...,...,...
4243,4542,Derrick Zimmerman,Mississippi State University,33.455174,-88.794377
4244,4543,Stephen Zimmerman,"University of Nevada, Las Vegas",36.108520,-115.143171
4245,4546,Jim Zoet,Kent State University,41.145559,-81.339288
4246,4547,Bill Zopf,Duquesne University,40.436791,-79.989232


In [95]:
#dependency
import gmaps

# Access maps with unique API key
gmaps.configure(api_key=gkey)

# creates heat map of colleges NBA players since 1950 atteneded
fig = gmaps.figure()
heatmap_layer = gmaps.heatmap_layer(collegecsv_df[["lat", "lng"]])
fig.add_layer(heatmap_layer)
fig


Figure(layout=FigureLayout(height='420px'))

In [99]:
# imports df that I created on "nba hometown data" that consists of player names and VORP, sorted from highest to lowest vorp
file_path4 = os.path.join("Output csv's", "vorp_sorted.csv")
vorp_sorted_df = pd.read_csv(file_path4)
vorp_sorted_df


Unnamed: 0,Player,VORP
0,Russell Westbrook,12.4
1,James Harden,9.0
2,LeBron James,7.3
3,Giannis Antetokounmpo,6.9
4,Jimmy Butler,6.3
...,...,...
481,Brandon Ingram,-1.1
482,Isaiah Whitehead,-1.2
483,Malcolm Delaney,-1.2
484,Domantas Sabonis,-1.2


In [107]:
# narrows down vorp list to the top 244
# chose 244 because some data gets lost when we merge df's in next cell
top_vorp_df = vorp_sorted_df.iloc[0:244, :]
top_vorp_df

Unnamed: 0,Player,VORP
0,Russell Westbrook,12.4
1,James Harden,9.0
2,LeBron James,7.3
3,Giannis Antetokounmpo,6.9
4,Jimmy Butler,6.3
...,...,...
239,Raul Neto,0.1
240,Marco Belinelli,0.1
241,Roy Hibbert,0.1
242,Luis Scola,0.1


In [108]:
# merges college df with vorp df
merge_df = pd.merge(top_vorp_df, collegecsv_df, left_on = "Player", right_on = "name")
merge_df


Unnamed: 0.1,Player,VORP,Unnamed: 0,name,college,lat,lng
0,Russell Westbrook,12.4,4290,Russell Westbrook,"University of California, Los Angeles",34.068921,-118.445181
1,James Harden,9.0,1627,James Harden,Arizona State University,33.424240,-111.928053
2,Jimmy Butler,6.3,577,Jimmy Butler,Marquette University,43.038834,-87.928567
3,Kawhi Leonard,6.2,2378,Kawhi Leonard,San Diego State University,32.775989,-117.071253
4,Stephen Curry,6.2,868,Stephen Curry,Davidson College,35.500802,-80.844673
...,...,...,...,...,...,...,...
195,Alan Williams,0.1,4352,Alan Williams,"University of California, Santa Barbara",34.413963,-119.848947
196,Tarik Black,0.1,334,Tarik Black,University of Kansas,38.954344,-95.255796
197,Harrison Barnes,0.1,200,Harrison Barnes,University of North Carolina,35.904912,-79.046913
198,Roy Hibbert,0.1,1750,Roy Hibbert,Georgetown University,38.907609,-77.072258


In [109]:
#dependency
import gmaps

# Access maps with unique API key
gmaps.configure(api_key=gkey)

# creates heat map of colleges NBA players of 2017 atteneded
fig = gmaps.figure()
heatmap_layer = gmaps.heatmap_layer(merge_df[["lat", "lng"]])
fig.add_layer(heatmap_layer)
fig


Figure(layout=FigureLayout(height='420px'))