# NBA + NCAAB Player Data Collection

In this notebook, we'll be diving into how we can collect player data from the NBA and the NCAA using a package called [sportsreference](https://github.com/roclark/sportsreference). We'll be extracting the past 10 years of player data in the NCAA, then extracting the past 10 years of player data in the NBA, and finally combining the two to see all the different players that have played in both leagues in the past 10 years.

In [1]:
# Standard imports
import numpy as np
import pandas as pd
import json
import time
import os.path
from os import path
import math
import datetime
import unidecode
import requests
from bs4 import BeautifulSoup

## Part 1: Getting all the players in the NCAA from the past 10 years, and all their corresponding college stats

In [2]:
# Modules from sportsrefernece.ncaab for college basketball
from sportsreference.ncaab.boxscore import Boxscore as NCAAB_Boxscore
from sportsreference.ncaab.conferences import Conferences as NCAAB_Conferences
from sportsreference.ncaab.rankings import Rankings as NCAAB_Rankings
from sportsreference.ncaab.roster import Player as NCAAB_Player
from sportsreference.ncaab.roster import Roster as NCAAB_Roster
from sportsreference.ncaab.schedule import Schedule as NCAAB_Schedule
from sportsreference.ncaab.teams import Teams as NCAAB_Teams

In [3]:
# Getting the past 10 years
past_10_years = ['201' + str(end) for end in range(10)]

# Storing player_ids for ease of usage
all_ids = set()

In [None]:
# Finding all the different player_ids for each of the different teams in the NCAA over the past 10 years
for year in past_10_years:
    teams = NCAAB_Teams(year)
    for team in teams:
        player_ids = NCAAB_Roster(team.abbreviation, year, True).players.keys()
        print("Finished with " + team.abbreviation + " in " + year + ". Moving on to the next year.")
        all_ids.update(player_ids)

In [None]:
player_ids = {'player_ids': list(all_ids)}

In [None]:
# Saving all player_ids to local file.
with open('player_ids.json', 'w') as outfile:
    json.dump(player_ids, outfile)
    outfile.close()

### Now that we've obtained all the different player_ids from the past 10 years in the NCAA, we can create a dataframe from all of the players' career averages.

In [4]:
with open('player_ids.json', 'r') as file:
    data = json.loads(file.read())
    player_ids = data['player_ids']

In [5]:
len(player_ids)

20599

In [6]:
def clean_height(df):
    def convert_height_to_int(string):
        """
        Converts height data in format 6-8 (6 feet 8 inches) to only inches (80).
        """
        return int(string[0]) * 12 + int(string[2])
    
    df['height'] = df['height'].apply(convert_height_to_int)
    return df

In [7]:
def construct_data(player_ids, num_players, interval_length, only_career=False, serialized=False):
    """
    Constructs the dataframe of player's data over the years they've played, with options to only return career
    average stats or return serialized data.
    
    Calculates total time to run and prints time taken between given interval length.
    """
    # Return dataframe if it already exists as a csv.
    if serialized and str(path.exists('ncaa_data.csv')):
        return pd.read_csv('ncaa_data.csv')
    
    # Begin recording time
    start_time = time.time()
    last_interval = start_time
    
    # Create basic df
    combined_stats = NCAAB_Player(player_ids[0]).dataframe
    
    # Add player data over time to dataframe.
    for i in range(1, num_players):
        player_id = player_ids[i]
       
        # Getting yearly and career average stats for each player.
        try:
            stats = NCAAB_Player(player_id).dataframe # Remove .loc['Career'] to also get yearly stats
            combined_stats = combined_stats.append(stats)
        except AttributeError:
            print("Error here")
            print(player_id)
            pass
        
        # Printing interval breakpoints
        if i % interval_length == 0:
            print("Currently at: " + str(i))
            curr_time = time.time()
            print("Taken " + str(curr_time - last_interval) + " seconds since the last breakpoint.")
            last_interval = curr_time

    end_time = time.time()
    print("Taken " + str(end_time - start_time) + " seconds to run the above code.")

    # Formatting dataframe
    combined_stats = combined_stats.rename_axis('row_type').reset_index()
    cols = ['player_id']  + [col for col in combined_stats if col != 'player_id']
    combined_stats = combined_stats[cols]
    clean_stats = clean_height(combined_stats)
    
    # Store data in csv
    clean_stats.to_csv('ncaa_data.csv')
    
    return clean_stats

In [8]:
per_id = 0.5 # Takes around 0.5 seconds to query per player_id
duration = len(player_ids) * per_id / 60 / 60
hours = math.floor(duration)
minutes = round(duration % 1 * 60)
print("""Constructing the player data for all the NCAAB players in the past 10 years will take around

{} hours and {} minutes

to finish! Long time-- watch a movie or something and let this run in the background.""".format(hours, minutes))

Constructing the player data for all the NCAAB players in the past 10 years will take around

2 hours and 52 minutes

to finish! Long time-- watch a movie or something and let this run in the background.


In [9]:
data = construct_data(player_ids, 50, 5, serialized=True)
data

Unnamed: 0.1,Unnamed: 0,player_id,row_type,assist_percentage,assists,block_percentage,blocks,box_plus_minus,conference,defensive_box_plus_minus,...,true_shooting_percentage,turnover_percentage,turnovers,two_point_attempts,two_point_percentage,two_pointers,usage_percentage,weight,win_shares,win_shares_per_40_minutes
0,0,raymar-morgan-1,2006-07,,24,,13,,big-ten,,...,0.538,18.5,69.0,233,0.498,116,,230,2.8,0.148
1,1,raymar-morgan-1,2007-08,12.3,58,1.4,19,,big-ten,,...,0.6,16.5,83.0,302,0.586,177,26.2,230,5.6,0.224
2,2,raymar-morgan-1,2008-09,11.2,41,0.8,7,,big-ten,,...,0.568,16.4,62.0,230,0.552,127,23.6,230,3.7,0.19
3,3,raymar-morgan-1,2009-10,12.9,63,2.9,25,,big-ten,,...,0.569,16.2,69.0,274,0.544,149,22.0,230,4.3,0.174
4,4,raymar-morgan-1,Career,12.2,186,1.8,64,,,,...,0.571,16.8,283.0,1039,0.548,569,24.0,230,16.4,0.186
5,5,erik-stevenson-1,2018-19,18.2,78,1.0,7,3.4,aac,3.0,...,0.455,18.0,58.0,82,0.415,34,19.5,210,1.6,0.077
6,6,erik-stevenson-1,2019-20,18.1,39,1.5,6,10.5,aac,4.9,...,0.549,9.9,21.0,77,0.481,37,22.7,210,2.5,0.232
7,7,erik-stevenson-1,Career,18.2,117,1.2,13,5.9,,3.7,...,0.494,14.8,79.0,159,0.447,71,20.6,210,4.1,0.131
8,8,kevin-booze-1,2014-15,30.3,85,0.0,0,-5.2,southland,-3.9,...,0.51,23.7,55.0,71,0.338,24,21.4,170,1.1,0.083
9,9,kevin-booze-1,2015-16,37.6,114,0.2,1,-3.6,southland,-3.3,...,0.526,21.9,67.0,125,0.472,59,23.1,170,1.5,0.096


### TODO: Deal with quantitative variables (conference, position, team_abbreviation). First two may be dealt with one hot encoding, but last one is difficult. 

### Idea: Funnel players into two groups, determined by whether or not they play for a historically well-drafted school.

### TODO: Fix NaN values for people with career averages of NaN (look at raymar-morgan-1)

# Part 2: Getting all the players in the NBA rookies that have come from the NCAA from the past 9 years, and all their corresponding rookie year stats

For this part, we did this two different ways, one mapping all the different NCAAB players to their possible NBA counterparts, and the other mapping all the NBA players to their possible NCAAB counterparts (there's no guarentee someone who played in the NCAA played in the NBA, or vice versa). Both options are listed, though we'll probably use the second one more because it's the cleaner, quicker way (less queries to make as there's less NBA players than NCAA players).

In [10]:
# Modules from sportsrefernece.ncaab for college basketball
from sportsreference.nba.boxscore import Boxscore as NBA_Boxscore
from sportsreference.nba.roster import Player as NBA_Player
from sportsreference.nba.roster import Roster as NBA_Roster
from sportsreference.nba.schedule import Schedule as NBA_Schedule
from sportsreference.nba.teams import Teams as NBA_Teams

## First way, taking all the different NCAA player ids, and seeing if these players exist in the NBA. Not as quick, as this way is many-to-one.

In [11]:
def convert_ncaa_nba_name(name):
    """
    Converts the format of the NCAA player_id to the NBA player_id.
    """
    splitted = name.split("-")
    first = splitted[0]
    last, num = splitted[len(splitted) - 2:] # To take into account middle names and hyphened names.
    return last[:5] + first[:2] + '0' + num

In [41]:
def get_nba_players(player_ids, num_players=0):
    """
    Gets the num_players number of players from the given NCAA player_ids
    from 2010 onwards.
    """
    players = []
    ncaa_ids = []
    if not num_players:
        num_players = len(player_ids)
    for num in range(len(player_ids)):
        nba_name = convert_ncaa_nba_name(player_ids[num])
        nba_player = NBA_Player(nba_name)
        
        try:
            nba_player.dataframe
        except TypeError:
            print("NCAA player", player_ids[num], "didn't make it to the NBA from 2009 to today.")
            continue
        
        date_str = nba_player._most_recent_season[:-3]
        try:
            date_obj = datetime.datetime.strptime(date_str, '%Y').date()
        except ValueError:
            print("Not enough data on NBA player", nba_player.player_id, ". Most likely not recent enough player to have data supported.")
            continue
        
        date_2010 = datetime.datetime.strptime('2010', '%Y').date()
        if date_obj >= date_2010:
            print("Found NBA player", nba_player.player_id)
            players.append(nba_player)
            ncaa_ids.append(player_ids[num])
    return players, ncaa_ids


In [13]:
# Timing how long it takes to retrive 'num' number of players
start_time = time.time()
current_nba_players, ncaa_ids = get_nba_players(player_ids, 100)
end_time = time.time()
print("Took", end_time - start_time, "seconds.")
current_nba_players, ncaa_ids

NCAA player garrett-golday-1 didn't make it to the NBA from 2009 to today.
NCAA player josh-armstrong-1 didn't make it to the NBA from 2009 to today.
NCAA player xairius-larry-1 didn't make it to the NBA from 2009 to today.
NCAA player cameron-miles-1 didn't make it to the NBA from 2009 to today.
NCAA player devon-friend-1 didn't make it to the NBA from 2009 to today.
NCAA player tyler-foster-2 didn't make it to the NBA from 2009 to today.
NCAA player mikhail-mclean-1 didn't make it to the NBA from 2009 to today.
NCAA player eric-horn-2 didn't make it to the NBA from 2009 to today.
NCAA player shane-reybold-1 didn't make it to the NBA from 2009 to today.
NCAA player josh-cuthbertson-1 didn't make it to the NBA from 2009 to today.
NCAA player zjori-bosha-1 didn't make it to the NBA from 2009 to today.
NCAA player questin-shropshire-1 didn't make it to the NBA from 2009 to today.
NCAA player robert-mitchell-1 didn't make it to the NBA from 2009 to today.
NCAA player griffin-hoffmann-1 di

([<sportsreference.nba.roster.Player at 0x10dbb9c88>,
  <sportsreference.nba.roster.Player at 0x118e88fd0>,
  <sportsreference.nba.roster.Player at 0x118e89d30>],
 ['nik-stauskas-1', 'lance-stephenson-1', 'jamie-jones-2'])

## Other way, from NBA to NCAA, where we get all the NBA Players from the past 10 years, and then their respective NBA data. Doing it this way because there are less NBA players than NCAA players (less requests)

### Very difficult to convert nba name to ncaa name. Lots of verification, for cases where this doesn't work, we will just exclude their data. 

### I.e. Frank Mason III, Troy Brown Jr., P.J. Tucker. May need to expand this function and fully encapsulate all the different cases

In [14]:
def convert_nba_ncaa_name(name):
    """
    Converts the format of the NBA player_id to the NCAA player_id.
    """
    return name.lower().replace(" ", "-") + "-1"

In [129]:
test = pd.DataFrame(columns=['name'] + list(NCAAB_Player('stephen-curry-1').dataframe.columns))
test

Unnamed: 0,name,assist_percentage,assists,block_percentage,blocks,box_plus_minus,conference,defensive_box_plus_minus,defensive_rebound_percentage,defensive_rebounds,...,true_shooting_percentage,turnover_percentage,turnovers,two_point_attempts,two_point_percentage,two_pointers,usage_percentage,weight,win_shares,win_shares_per_40_minutes


In [162]:
NCAAB_Player('stephen-curry-1').dataframe.loc['Career'].reset_index()

Unnamed: 0,level_0,assist_percentage,assists,block_percentage,blocks,box_plus_minus,conference,defensive_box_plus_minus,defensive_rebound_percentage,defensive_rebounds,...,true_shooting_percentage,turnover_percentage,turnovers,two_point_attempts,two_point_percentage,two_pointers,usage_percentage,weight,win_shares,win_shares_per_40_minutes
0,Career,40.2,388,0.6,28,,,,,392,...,0.62,12.9,314,862,0.53,457,38.3,185,27.3,0.322


In [170]:
ditc = NCAAB_Player('stephen-curry-1').dataframe.loc['Career'].iloc[0].to_dict()
ditc['name'] = 'Stephen Curry'
bleep = test.append(ditc, ignore_index=True)
bleep

Unnamed: 0,name,assist_percentage,assists,block_percentage,blocks,box_plus_minus,conference,defensive_box_plus_minus,defensive_rebound_percentage,defensive_rebounds,...,true_shooting_percentage,turnover_percentage,turnovers,two_point_attempts,two_point_percentage,two_pointers,usage_percentage,weight,win_shares,win_shares_per_40_minutes
0,Stephen Curry,40.2,388,0.6,28,,,,,392,...,0.62,12.9,314,862,0.53,457,38.3,185,27.3,0.322


In [180]:
ditc.dataframe

AttributeError: 'dict' object has no attribute 'dataframe'

# Set columns to NBA and NCAAB so that the tables can merge properly

In [178]:
# peep = NBA_Player('curryst01').dataframe.iloc[0].to_dict()
peep = {}
peep['blargity'] = 'hielo'
peep['name'] = 'Stephen Curry'
bloop = test.append(peep, ignore_index=True)
bloop

Unnamed: 0,name,assist_percentage,assists,block_percentage,blocks,box_plus_minus,conference,defensive_box_plus_minus,defensive_rebound_percentage,defensive_rebounds,...,turnover_percentage,turnovers,two_point_attempts,two_point_percentage,two_pointers,usage_percentage,weight,win_shares,win_shares_per_40_minutes,blargity
0,Stephen Curry,,,,,,,,,,...,,,,,,,,,,hielo


In [None]:
bleep.merge()

In [None]:
pd.concat()

In [175]:
bleep.merge(bloop)

ValueError: You are trying to merge on object and float64 columns. If you wish to proceed you should use pd.concat

In [150]:
NCAAB_Player('stephen-curry-1').dataframe.shape

(4, 51)

In [151]:
NBA_Player('curryst01').dataframe.shape

(12, 89)

In [124]:
change = test.append(curry.dataframe.iloc[0])
change

Unnamed: 0,name,assist_percentage,assists,block_percentage,blocks,box_plus_minus,conference,defensive_box_plus_minus,defensive_rebound_percentage,defensive_rebounds,...,shooting_fouls_drawn,shooting_guard_percentage,shots_blocked,small_forward_percentage,take_fouls,three_point_shot_percentage_from_corner,three_pointers_assisted_percentage,two_pointers_assisted_percentage,value_over_replacement_player,win_shares_per_48_minutes
"(2009-10,)",,24.6,472,0.5,19,1.0,,-0.7,12.0,308,...,,0.0,,0.0,,0.574,0.741,0.307,2.2,0.077


In [101]:
def get_nba_ncaa_10_years(set_players, one_loop=True):
    """
    Getting the college basketball data for all NBA Players in the past 10 years.
    """
    past_10_ncaab_stats = pd.DataFrame(columns=['name'] + list(NCAAB_Player('stephen-curry-1').dataframe.columns))
    past_10_nba_rookie_stats = pd.DataFrame(columns=['name'] + list(NBA_Player('curryst01').dataframe.columns))
#     player_ids = []
    seen = set() # To keep track of seen NBA players
    teams = NBA_Teams()
    
    for team in teams:
        for year in past_10_years:
            
            start = time.time() # For time measuring purposes
            
            for player in NBA_Roster(team.abbreviation, year).players:
                if player in seen:
                    continue
                seen.add(player)
                unaccented_name = unidecode.unidecode(player.name) # We use this because maybe some of NBA players played in the NCAA with an accented name
                ncaab_player_id = convert_nba_ncaa_name(unaccented_name) 
                try:
                    college_stats = NCAAB_Player(ncaab_player_id)
                except TypeError: # Player doesn't exist
                    continue
                if ncaab_player_id in set_players:
                    nba_dict = player.dataframe.iloc[0].to_dict()
                    nba_dict['name'] = player.name
                    college_dict = college_stats.dataframe.loc['Career'].to_dict()
                    college_dict['name'] = player.name
                
                    past_10_nba_rookie_stats = past_10_nba_rookie_stats.append(nba_dict, ignore_index=True)
                    past_10_ncaab_stats = past_10_ncaab_stats.append(college_dict, ignore_index=True)

            if one_loop:
                end = time.time()
                print("One iteration for one team and one year would take", end - start, "seconds to run.")
                print("Would take", (end - start) * 300 / 60 / 60, "hours to find all players that played in the NBA in the past 10 years and their respective college stats.")
                return past_10_nba_rookie_stats, past_10_ncaab_stats
    
    combined = 
    return past_10_nba_rookie_stats, past_10_ncaab_stats, combined


In [91]:
rookie, college, combined = get_nba_ncaa_10_years(set(player_ids), one_loop=False)

One iteration for one team and one year would take 55.666030168533325 seconds to run.
Would take 4.638835847377777 hours to find all players that played in the NBA in the past 10 years and their respective college stats.


Unnamed: 0,player_id
"(2010-11,)",bledser01
"(2016-17,)",brogdma01
"(2013-14,)",canaais01
"(2018-19,)",colsobo01
"(2015-16,)",connapa01
"(2013-14,)",dellama01
"(2018-19,)",divindo01
"(2018-19,)",duvaltr01
"(2014-15,)",fraziti01
"(2012-13,)",hensojo01


In [108]:
rookie.columns

Index(['and_ones', 'assist_percentage', 'assists', 'block_percentage',
       'blocking_fouls', 'blocks', 'box_plus_minus', 'center_percentage',
       'defensive_box_plus_minus', 'defensive_rebound_percentage',
       'defensive_rebounds', 'defensive_win_shares', 'dunks',
       'effective_field_goal_percentage', 'field_goal_attempts',
       'field_goal_perc_sixteen_foot_plus_two_pointers',
       'field_goal_perc_ten_to_sixteen_feet',
       'field_goal_perc_three_to_ten_feet',
       'field_goal_perc_zero_to_three_feet', 'field_goal_percentage',
       'field_goals', 'free_throw_attempt_rate', 'free_throw_attempts',
       'free_throw_percentage', 'free_throws', 'games_played', 'games_started',
       'half_court_heaves', 'half_court_heaves_made', 'height',
       'lost_ball_turnovers', 'minutes_played', 'nationality',
       'net_plus_minus', 'offensive_box_plus_minus', 'offensive_fouls',
       'offensive_rebound_percentage', 'offensive_rebounds',
       'offensive_win_shares', '

In [97]:
rookie_data = rookie.to_csv("Rookie_Data.csv")

In [98]:
college_data = college.to_csv("College_Data.csv")

Unnamed: 0.1,Unnamed: 0,and_ones,assist_percentage,assists,block_percentage,blocking_fouls,blocks,box_plus_minus,center_percentage,defensive_box_plus_minus,...,turnovers,two_point_attempts,two_point_percentage,two_pointers,two_pointers_assisted_percentage,usage_percentage,value_over_replacement_player,weight,win_shares,win_shares_per_48_minutes
0,"('2010-11',)",,24.2,289,1.1,,26,-2.0,0,0.6,...,194,370,0.47,174,0.253,17.9,0.0,205,0.3,0.008
1,"('2016-17',)",,24.3,317,0.5,,12,-0.5,0,-1.1,...,113,442,0.48,212,0.415,18.5,0.8,229,4.1,0.1
2,"('2013-14',)",,13.2,22,1.2,,4,-3.4,0,-2.1,...,21,32,0.406,13,0.308,21.0,-0.1,201,0.2,0.031
3,"('2018-19',)",,4.0,3,0.9,,1,-4.3,0,-1.0,...,3,18,0.444,8,0.875,19.6,-0.1,224,0.2,0.085
4,"('2015-16',)",,9.9,10,0.0,,0,-8.2,0,-2.4,...,10,28,0.286,8,0.5,18.7,-0.2,209,-0.2,-0.076
5,"('2013-14',)",,22.1,187,0.3,,5,-2.1,0,-1.7,...,60,141,0.461,65,0.477,13.2,0.0,200,2.1,0.078
6,"('2018-19',)",,9.7,31,1.2,,6,-1.8,0,0.1,...,19,41,0.683,28,0.679,15.1,0.0,203,0.6,0.069
7,"('2018-19',)",,59.0,2,0.0,,0,9.7,0,-4.1,...,0,2,0.5,1,1.0,20.9,0.0,189,0.1,0.537
8,"('2014-15',)",,39.4,60,0.0,,0,-7.3,0,-2.4,...,25,44,0.364,16,0.188,17.5,-0.3,170,-0.1,-0.017
9,"('2012-13',)",,6.3,31,3.7,,42,-2.9,0,-0.2,...,47,333,0.486,162,0.611,22.6,-0.2,219,1.9,0.109


# Extraneous for now

### There is a variety of different ways players get into the NBA.

### Cases:

1. From NCAA to NBA Draft
2. From NCAA to going undrafted and signed separately
3. From overseas to NBA Draft
4. From overseas to undrafted and signed separately

Problems with finding NBA players that came from the NCAA over the past ten years with this package:
1. URL formatting (hard to determine a perfect one-to-one mapping from NCAA player to NBA player, if the player has indeed played in both leagues.)

In [17]:
curry = NBA_Player('curryst01')
curry

<sportsreference.nba.roster.Player at 0x11d4c4748>

In [20]:
# Removing empty/not useful qualitative columns
new_curry = curry.dataframe.drop(columns=[
    'and_ones', 
    'blocking_fouls', 
    'lost_ball_turnovers', 
    'net_plus_minus', 
    'offensive_fouls',
    'on_court_plus_minus',
    'other_turnovers',
    'passing_turnovers',
    'player_id',
    'points_generated_by_assists',
    'shooting_fouls',
    'shooting_fouls_drawn',
    'shots_blocked',
    'take_fouls',
    'team_abbreviation'
])

# Evaluating runtimes for different methods of data extraction

In [44]:
start_time = time.time()
maindata = []
times = []
midpoint = time.time()
for i in range(100):
    name = player_ids[i]
    realname = name
    name = name.lower()
    name = name.replace(" ", "-")
    website_url = requests.get('https://www.sports-reference.com/cbb/players/'+ name +'.html')
    soup = BeautifulSoup(website_url.content,'lxml')
    soup.prettify()
    My_table = soup.find('table',{'id':'players_per_game'})
    if My_table is not None:
        maindata = maindata + [realname]
        tabledata = My_table.findAll('td')
        data = []
        for cell in tabledata:
            data.append(cell.get_text())
        maindata = maindata +data[-28:]
    if i % 10 == 0:
        midendpoint = time.time()
        times.append(midendpoint - midpoint)
        print(midendpoint - midpoint)
        midpoint = midendpoint
        
end_time = time.time()
end_time - start_time

0.38002777099609375
3.728515148162842
7.556318044662476
5.162214040756226
5.096082925796509
5.424991130828857
5.260880947113037
5.241074085235596
5.042371034622192
6.61317777633667


53.03249406814575

In [45]:
more_times = []
for i in range(10):
    current = time.time()
    construct_data(player_ids, 10, 1)
    end = time.time()
    more_times.append(end - current)

Currently at: 1
Taken 0.9880430698394775 seconds since the last breakpoint.
Currently at: 2
Taken 0.4531288146972656 seconds since the last breakpoint.
Currently at: 3
Taken 0.4620842933654785 seconds since the last breakpoint.
Currently at: 4
Taken 0.41640186309814453 seconds since the last breakpoint.
Currently at: 5
Taken 0.39745616912841797 seconds since the last breakpoint.
Currently at: 6
Taken 0.411693811416626 seconds since the last breakpoint.
Currently at: 7
Taken 0.6019840240478516 seconds since the last breakpoint.
Currently at: 8
Taken 0.5324478149414062 seconds since the last breakpoint.
Currently at: 9
Taken 0.43987321853637695 seconds since the last breakpoint.
Taken 4.703129053115845 seconds to run the above code.
Currently at: 1
Taken 0.9887392520904541 seconds since the last breakpoint.
Currently at: 2
Taken 0.4690537452697754 seconds since the last breakpoint.
Currently at: 3
Taken 0.4631178379058838 seconds since the last breakpoint.
Currently at: 4
Taken 0.4180061

In [46]:
times

[0.38002777099609375,
 3.728515148162842,
 7.556318044662476,
 5.162214040756226,
 5.096082925796509,
 5.424991130828857,
 5.260880947113037,
 5.241074085235596,
 5.042371034622192,
 6.61317777633667]

In [47]:
more_times

[4.722679853439331,
 4.949055910110474,
 4.979331970214844,
 4.570301294326782,
 4.459066152572632,
 4.52507209777832,
 4.517408132553101,
 5.924187898635864,
 6.417668104171753,
 6.589348793029785]

In [48]:
np.mean(times)

4.95056529045105

In [49]:
np.mean(more_times)

5.165412020683289