In [2]:
import requests
import pandas as pd
import os

from api_key import geoapify_key

In [3]:
# Create stadium_df
stadiums_path = os.path.join("..", "Resources", "stadiums.csv")
stadium_df = pd.read_csv(stadiums_path)

# add a team_id column
stadium_df["team_id"] = list(range(1, (len(stadium_df)+1)))
stadium_df = stadium_df[["team_id", "Team", "League", "Division", "Lat", "Long"]]
stadium_df

Unnamed: 0,team_id,Team,League,Division,Lat,Long
0,1,Dallas Mavericks,NBA,West,32.790556,-96.810278
1,2,Orlando Magic,NBA,East,28.539167,-81.383611
2,3,San Antonio Spurs,NBA,West,29.426944,-98.437500
3,4,Denver Nuggets,NBA,West,39.748920,-105.008400
4,5,Brooklyn Nets,NBA,East,40.682661,-73.975225
...,...,...,...,...,...,...
148,149,FC Dallas,MLS,Western Conference,33.154444,-96.835278
149,150,FC Cincinnati,MLS,Eastern Conference,39.111389,-84.522222
150,151,New York City FC,MLS,Eastern Conference,40.829167,-73.926389
151,152,Charlotte FC,MLS,Eastern Conference,35.225833,-80.852778


In [4]:
# Create league_df from the unique leagues in stadium_df
league_ids = list(range(1,6))

league_df = pd.DataFrame({
    "league_id": league_ids,
    "league": stadium_df["League"].unique()
})

# Export league_df
league_df.to_csv(os.path.join("..", "Resources", "league.csv"), index=False)
league_df

Unnamed: 0,league_id,league
0,1,NBA
1,2,NFL
2,3,NHL
3,4,MLB
4,5,MLS


In [5]:
# Add league_id to each row of stadium_df
stadium_df["league_id"] = ""
for index, row in stadium_df.iterrows():
    if row["League"] == "NBA":
        stadium_df.loc[index,"league_id"] = 1
    elif row["League"] == "NFL":
        stadium_df.loc[index,"league_id"] = 2
    elif row["League"] == "NHL":
        stadium_df.loc[index,"league_id"] = 3
    elif row["League"] == "MLB":
        stadium_df.loc[index,"league_id"] = 4
    elif row["League"] == "MLS":
        stadium_df.loc[index,"league_id"] = 5

## Drop League and Division columns
stadium_df = stadium_df[['team_id', 'Team', 'Lat', 'Long', 'league_id']]
stadium_df.head()

Unnamed: 0,team_id,Team,Lat,Long,league_id
0,1,Dallas Mavericks,32.790556,-96.810278,1
1,2,Orlando Magic,28.539167,-81.383611,1
2,3,San Antonio Spurs,29.426944,-98.4375,1
3,4,Denver Nuggets,39.74892,-105.0084,1
4,5,Brooklyn Nets,40.682661,-73.975225,1


In [6]:
# Create venue_df

# Set up constants for API call
base_url = "https://api.geoapify.com/v2/places"
params = {
    "apiKey": geoapify_key,
    "categories": "sport.stadium"
}

venue_id_dict = {}
current_venue_id = 1

# Iterate through stadium_df and stadium data for each team
for index, row in stadium_df.iterrows():
    # set lon and lat variable
    lon = row['Long']
    lat = row['Lat']
    
    # update params dict
    params["bias"] = f"proximity:{lon},{lat}"
    
    # API call
    response_features = requests.get(base_url, params=params).json()['features']
    
    # make sure data was returned
    if len(response_features) > 0:
        
        # grab the first feature that has a 'name' property
        for feature in response_features:
            if "name" in feature['properties'].keys():
                stadium = feature['properties']
                break
        
        # add venue to venue_id_dict if not already there
        if stadium["name"] not in venue_id_dict.keys():
            venue_id_dict[stadium["name"]] = current_venue_id
            current_venue_id +=1
        
        # add data to stadium_df
        stadium_df.loc[index, "venue_id"] = venue_id_dict[stadium["name"]]
        stadium_df.loc[index, "Venue Name"] = stadium['name']
        if "city" in stadium.keys():
            stadium_df.loc[index, "Venue City"] = stadium['city']
        if "state" in stadium.keys():
            stadium_df.loc[index, "Venue State"] = stadium['state']
        if "country_code" in stadium.keys():
            stadium_df.loc[index, "Venue Country"] = stadium['country_code']
        if "address_line2" in stadium.keys():
            stadium_df.loc[index, "Venue Address"] = stadium['address_line2']
        stadium_df.loc[index, "Venue Lat"]= stadium['lat']
        stadium_df.loc[index, "Venue Lon"]= stadium['lon']
        if "capacity" in stadium['datasource']['raw'].keys():
            capacity = stadium['datasource']['raw']['capacity']
            if type(capacity) == str:
                stadium_df.loc[index, "Venue Capacity"] = int(capacity.replace(",","").replace(".",""))
            else:
                stadium_df.loc[index, "Venue Capacity"] = capacity
        print(f"New stadium added: {stadium['name']}")
    else:
        print("NO RESULTS:" + stadium_df.loc[index,"Team"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stadium_df.loc[index, "venue_id"] = venue_id_dict[stadium["name"]]


New stadium added: American Airlines Center
New stadium added: Amway Center
New stadium added: AT&T Center
New stadium added: Ball Arena
New stadium added: Barclays Center
New stadium added: Capital One Arena
New stadium added: Chase Center
New stadium added: Crypto.com Arena
New stadium added: Crypto.com Arena
New stadium added: FedExForum
New stadium added: Fiserv Forum
New stadium added: Footprint Center
New stadium added: Kaseya Center
New stadium added: Gainbridge Fieldhouse
New stadium added: Golden 1 Center
New stadium added: Little Caesars Arena
New stadium added: Madison Square Garden
New stadium added: Moda Center
New stadium added: Paycom Center
New stadium added: Rocket Mortgage FieldHouse
New stadium added: Scotiabank Arena
New stadium added: Smoothie King Center
New stadium added: Spectrum Center
New stadium added: State Farm Arena
New stadium added: Target Center
New stadium added: TD Garden
New stadium added: Toyota Center
New stadium added: United Center
New stadium ad

In [7]:
stadium_df['venue_id'] = stadium_df['venue_id'].astype(int)
stadium_df.head()

Unnamed: 0,team_id,Team,Lat,Long,league_id,venue_id,Venue Name,Venue City,Venue State,Venue Country,Venue Address,Venue Lat,Venue Lon,Venue Capacity
0,1,Dallas Mavericks,32.790556,-96.810278,1,1,American Airlines Center,Dallas,Texas,us,"2500 Victory Avenue, Dallas, TX 75219, United ...",32.790508,-96.810272,
1,2,Orlando Magic,28.539167,-81.383611,1,2,Amway Center,Orlando,Florida,us,"Hughey Avenue, Orlando, FL 32801, United State...",28.539274,-81.384007,
2,3,San Antonio Spurs,29.426944,-98.4375,1,3,AT&T Center,San Antonio,Texas,us,"San Antonio, TX 78219, United States of America",29.42705,-98.437507,
3,4,Denver Nuggets,39.74892,-105.0084,1,4,Ball Arena,Denver,Colorado,us,"1000 Chopper Circle, Denver, CO 80204, United ...",39.748684,-105.007544,
4,5,Brooklyn Nets,40.682661,-73.975225,1,5,Barclays Center,New York,New York,us,"620 Atlantic Avenue, New York, NY 11217, Unite...",40.682511,-73.975252,


In [8]:
# Split the stadium_df into team_df and venue_df
venue_cols = ["venue_id", "Venue Name", "Venue City", "Venue State", "Venue Country", "Venue Address", "Venue Lat", "Venue Lon", "Venue Capacity"]
venue_df = stadium_df[venue_cols]
venue_col_names = {
    "Venue Name":"venue_name",
    "Venue City":"venue_city",
    "Venue State":"venue_state",
    "Venue Country":"venue_country",
    "Venue Address":"venue_address",
    "Venue Lat":"venue_lat",
    "Venue Lon":"venue_lon",
    "Venue Capacity":"venue_capacity"
}
venue_df = venue_df.rename(columns=venue_col_names)

# Place holder column
venue_df['next_event_id'] = 0

# Drop duplicates from Venue
venue_df = venue_df.drop_duplicates()

venue_df.head()

Unnamed: 0,venue_id,venue_name,venue_city,venue_state,venue_country,venue_address,venue_lat,venue_lon,venue_capacity,next_event_id
0,1,American Airlines Center,Dallas,Texas,us,"2500 Victory Avenue, Dallas, TX 75219, United ...",32.790508,-96.810272,,0
1,2,Amway Center,Orlando,Florida,us,"Hughey Avenue, Orlando, FL 32801, United State...",28.539274,-81.384007,,0
2,3,AT&T Center,San Antonio,Texas,us,"San Antonio, TX 78219, United States of America",29.42705,-98.437507,,0
3,4,Ball Arena,Denver,Colorado,us,"1000 Chopper Circle, Denver, CO 80204, United ...",39.748684,-105.007544,,0
4,5,Barclays Center,New York,New York,us,"620 Atlantic Avenue, New York, NY 11217, Unite...",40.682511,-73.975252,,0


In [9]:
team_df = stadium_df[["team_id", "Team", "venue_id", "league_id"]]
team_df["league_id"] = team_df["league_id"].astype(int)

team_df = team_df.rename(columns={
    "Team":"team"
})

team_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_df["league_id"] = team_df["league_id"].astype(int)


Unnamed: 0,team_id,team,venue_id,league_id
0,1,Dallas Mavericks,1,1
1,2,Orlando Magic,2,1
2,3,San Antonio Spurs,3,1
3,4,Denver Nuggets,4,1
4,5,Brooklyn Nets,5,1


In [10]:
# Austin FC Venue
venue_df[venue_df["venue_name"] == "North Building"]
venue_df.loc[142, 'venue_name'] = "Q2 Stadium"

In [11]:
venue_df.to_csv(os.path.join("..", "Resources", "venue.csv"), index=False)
team_df.to_csv(os.path.join("..", "Resources", "team.csv"), index=False)