# College Basketball Scraping Project

In [None]:
# All websites
import json
import bs4
import requests
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
import time

# ESPN website
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

# Kenpom Site
from kenpompy.utils import login
import kenpompy.summary as kps
import kenpompy.team as kpt
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## NCAA Net

Data is from NCAA website and will include net ranking as well as records for all quadrants and locations.

In [None]:
scraped_data = {}
url = 'https://www.ncaa.com/rankings/basketball-men/d1/ncaa-mens-basketball-net-rankings'

headers = {
    'user-agent': 'myuseragent',
}

r = requests.get(url, params=headers)
print(f'Status code for NCAA Site - {r.status_code}')

Parsing through html to get data and create dataframe.

In [None]:
soup = bs4.BeautifulSoup(r.text, "html.parser")
table = soup.find('table')

net = {
    'rank': [],
    'team': [],
    'road': [],
    'neutral': [],
    'home': [],
    'quad1': [],
    'quad2': [],
    'quad3': [],
    'quad4': [],
}

for data in table.find_all('tbody'):
    rows = data.find_all('tr', role="row")
    
data = table.find('tbody')
rows = data.find_all('tr')

for row in rows:
    rank = row.find_all('td')[0].text
    team = row.find_all('td')[2].text
    road = row.find_all('td')[5].text
    neutral = row.find_all('td')[6].text
    home = row.find_all('td')[7].text
    quad1 = row.find_all('td')[8].text
    quad2 = row.find_all('td')[9].text
    quad3 = row.find_all('td')[10].text
    quad4 = row.find_all('td')[11].text
    
    net['rank'].append(rank)
    net['team'].append(team)
    net['road'].append(road)
    net['neutral'].append(neutral)
    net['home'].append(home)
    net['quad1'].append(quad1)
    net['quad2'].append(quad2)
    net['quad3'].append(quad3)
    net['quad4'].append(quad4)
    
df = pd.DataFrame.from_dict(net)

Have to fix several school names for consistency.

In [None]:
df['team'] = df.team.replace({"Saint Mary's (CA)":"Saint Mary's",
                 "UConn":"Connecticut",
                 "Fla. Atlantic":"Florida Atlantic",
                 "Miami (FL)":"Miami FL",
                 "NC State":"N.C. State",
                 "Southern California":"USC",
                 "Col. of Charleston":"Charleston",
                 "Sam Houston":"Sam Houston St.",
                 "Southern Miss.":"Southern Miss",
                 "St. John's (NY)":"St. John's",
                 "LMU (CA)":"Loyola Marymount",
                 "Middle Tenn.":"Middle Tennessee",
                 "Eastern Wash.":"Eastern Washington",
                 "SFA":"Stephen F. Austin",
                 "Ole Miss":"Mississippi",
                 "Southern Ill.":"Southern Illinois",
                 "Seattle U":"Seattle",
                 "South Fla.":"South Florida",
                 "UNCW":"UNC Wilmington",
                 "California Baptist":"Cal Baptist",
                 "Western Ky.":"Western Kentucky",
                 "Northern Ky.":"Northern Kentucky",
                 "Eastern Ky.":"Eastern Kentucky",
                 "N.C. Central":"North Carolina Central",
                 "App State":"Appalachian St.",
                 "FGCU":"Florida Gulf Coast",
                 "A&M-Corpus Christi":"Texas A&M Corpus Chris",
                 "Grambling":"Grambling St.",
                 "Gardner-Webb":"Gardner Webb",
                 "St. Thomas (MN)":"St. Thomas",
                 "Ga. Southern":"Georgia Southern",
                 "Queens (NC)":"Queens",
                 "Northern Colo.":"Northern Colorado",
                 "Northern Ariz.":"Northern Arizona",
                 "UNI":"Northern Iowa",
                 "UMES":"Maryland Eastern Shore",
                 "SIUE":"SIU Edwardsville",
                 "NIU":"Northern Illinois",
                 "ETSU":"East Tennessee St.",
                 "Western Caro.":"Western Carolina",
                 "Western Ill.":"Western Illinois",
                 "North Ala.":"North Alabama",
                 "Alcorn":"Alcorn St.",
                 "UTRGV":"UT Rio Grande Valley",
                 "UT Martin":"Tennessee Martin",
                 "Southeast Mo. St.":"Southeast Missouri St.",
                 "Nicholls":"Nicholls St.",
                 "Southern Ind.":"Southern Indiana",
                 "Army West Point":"Army",
                 "Southern U.":"Southern",
                 "Boston U.":"Boston University",
                 "Southeastern La.":"Southeastern Louisiana",
                 "Prairie View":"Prairie View A&M",
                 "UIC":"Illinois Chicago",
                 "Miami (OH)":"Miami OH",
                 "ULM":"Louisiana Monroe",
                 "CSU Bakersfield":"Cal St. Bakersfield",
                 "Kansas City":"UMKC",
                 "Charleston So.":"Charleston Southern",
                 "Tex. A&M-Commerce":"Texas A&M Commerce",
                 "Eastern Mich.":"Eastern Michigan",
                 "Omaha":"Nebraska Omaha",
                 "Loyola Maryland":"Loyola MD",
                 "Central Mich.":"Central Michigan",
                 "Western Mich.":"Western Michigan",
                 "N.C. A&T":"North Carolina A&T",
                 "Ark.-Pine Bluff":"Arkansas Pine Bluff",
                 "CSUN":"Cal St. Northridge",
                 "UIW":"Incarnate Word",
                 "Central Ark.":"Central Arkansas",
                 "McNeese":"McNeese St.",
                 "Central Conn. St.":"Central Connecticut",
                 "Eastern Ill.":"Eastern Illinois",
                 "Saint Francis (PA)":"St. Francis PA",
                 "UAlbany":"Albany",
                 "Mississippi Val.":"Mississippi Valley St.",
                 "Lamar University":"Lamar",
                 "St. Francis Brooklyn":"St. Francis NY",
                 "Bethune-Cookman":"Bethune Cookman",
                 "FDU":"Fairleigh Dickinson",
                })

Save to a csv and make sure data looks correct.

In [None]:
df.to_csv('net.csv', index=False)
print(df.head())
print('\n\nNCAA Net extraction completed')

## ESPN BPI and Resume

### BPI
Data is from ESPN website and will include BPI rankings.

In [None]:
path = r'C:\chromedriver/chromedriver'
service = Service(executable_path = path)
browser = webdriver.Chrome(executable_path = path)
browser.get('https://www.espn.com/mens-college-basketball/bpi')

html = browser.page_source.encode('utf-8')

page_num = 0

try:
    while browser.find_element(By.CLASS_NAME, "loadMore"):
        browser.find_element(By.CLASS_NAME, "loadMore__link").click()
        page_num += 1
        print("getting page number "+str(page_num))
        time.sleep(1)
except Exception as e:
    print("\n\nEnd of Page")

Parse through html to find data and create dataframe.

In [None]:
soup = bs4.BeautifulSoup(browser.page_source, "html.parser")
table = soup.find_all('table')

BPI_data = {
    'team': [],
    'BPI': [],
    'rank': [],
    'off_BPI': [],
    'def_BPI': [],
}

table_names = table[0]
names = table_names.find('tbody')
names_rows = names.find_all('tr')

table_data = table[1]
data = table_data.find('tbody')
rows = data.find_all('tr')

for row in names_rows:
    team = row.find_all('td')[0].text
    
    BPI_data['team'].append(team)

for row in rows:
    BPI = row.find_all('td')[1].text
    rank = row.find_all('td')[2].text
    off_BPI = row.find_all('td')[4].text
    def_BPI = row.find_all('td')[5].text
    
    BPI_data['BPI'].append(BPI)
    BPI_data['rank'].append(rank)
    BPI_data['off_BPI'].append(off_BPI)
    BPI_data['def_BPI'].append(def_BPI)
    
df = pd.DataFrame.from_dict(BPI_data)

Continue to be consistent with school names.

In [None]:
df['team'] = df.team.replace({'Houston Cougars':'Houston',
 'Tennessee Volunteers':'Tennessee',
 'Alabama Crimson Tide':'Alabama',
 'UCLA Bruins':'UCLA',
 'UConn Huskies':'Connecticut',
 'Purdue Boilermakers':'Purdue',
 'Texas Longhorns':'Texas',
 'Baylor Bears':'Baylor',
 'Gonzaga Bulldogs':'Gonzaga',
 'Kansas Jayhawks':'Kansas',
 'Creighton Bluejays':'Creighton',
 'Arizona Wildcats':'Arizona',
 'Arkansas Razorbacks':'Arkansas',
 'Marquette Golden Eagles':'Marquette',
 "Saint Mary's Gaels":"Saint Mary's",
 'Kentucky Wildcats':'Kentucky',
 'Indiana Hoosiers':'Indiana',
 'Maryland Terrapins':'Maryland',
 'Texas A&M Aggies':'Texas A&M',
 'Xavier Musketeers':'Xavier',
 'Rutgers Scarlet Knights':'Rutgers',
 'West Virginia Mountaineers':'West Virginia',
 'TCU Horned Frogs':'TCU',
 'Virginia Cavaliers':'Virginia',
 'San Diego State Aztecs':'San Diego St.',
 'Kansas State Wildcats':'Kansas St.',
 'Duke Blue Devils':'Duke',
 'Iowa State Cyclones':'Iowa St.',
 'Auburn Tigers':'Auburn',
 'Iowa Hawkeyes':'Iowa',
 'Illinois Fighting Illini':'Illinois',
 'Providence Friars':'Providence',
 'Miami Hurricanes':'Miami FL',
 'Michigan State Spartans':'Michigan St.',
 'Liberty Flames':'Liberty',
 'North Carolina Tar Heels':'North Carolina',
 'Oklahoma State Cowboys':'Oklahoma St.',
 'Mississippi State Bulldogs':'Mississippi St.',
 'Memphis Tigers':'Memphis',
 'Ohio State Buckeyes':'Ohio St.',
 'Florida Atlantic Owls':'Florida Atlantic',
 'NC State Wolfpack':'N.C. State',
 'Oregon Ducks':'Oregon',
 'Boise State Broncos':'Boise St.',
 'Northwestern Wildcats':'Northwestern',
 'Texas Tech Red Raiders':'Texas Tech',
 'Dayton Flyers':'Dayton',
 'Sam Houston Bearkats':'Sam Houston St.',
 'Florida Gators':'Florida',
 'USC Trojans':'USC',
 'Yale Bulldogs':'Yale',
 'Utah State Aggies':'Utah St.',
 'Penn State Nittany Lions':'Penn St.',
 'Iona Gaels':'Iona',
 'Bradley Braves':'Bradley',
 'Michigan Wolverines':'Michigan',
 'Clemson Tigers':'Clemson',
 'UCF Knights':'UCF',
 'Cincinnati Bearcats':'Cincinnati',
 'Oklahoma Sooners':'Oklahoma',
 'Charleston Cougars':'Charleston',
 'Missouri Tigers':'Missouri',
 'Virginia Tech Hokies':'Virginia Tech',
 'Pittsburgh Panthers':'Pittsburgh',
 'VCU Rams':'VCU',
 'UAB Blazers':'UAB',
 'Nevada Wolf Pack':'Nevada',
 'Drake Bulldogs':'Drake',
 'Wisconsin Badgers':'Wisconsin',
 'Washington State Cougars':'Washington St.',
 'Arizona State Sun Devils':'Arizona St.',
 'Villanova Wildcats':'Villanova',
 'Utah Utes':'Utah',
 'Marshall Thundering Herd':'Marshall',
 'New Mexico Lobos':'New Mexico',
 'Kent State Golden Flashes':'Kent St.',
 'James Madison Dukes':'James Madison',
 'Seton Hall Pirates':'Seton Hall',
 'Wake Forest Demon Deacons':'Wake Forest',
 'Toledo Rockets':'Toledo',
 'Colorado Buffaloes':'Colorado',
 'North Texas Mean Green':'North Texas',
 'Hofstra Pride':'Hofstra',
 'Stanford Cardinal':'Stanford',
 'Vanderbilt Commodores':'Vanderbilt',
 'Oral Roberts Golden Eagles':'Oral Roberts',
 "St. John's Red Storm":"St. John's",
 'Indiana State Sycamores':'Indiana St.',
 'UNLV Rebels':'UNLV',
 'Furman Paladins':'Furman',
 'Saint Louis Billikens':'Saint Louis',
 'Tulane Green Wave':'Tulane',
 'BYU Cougars':'BYU',
 'UC Irvine Anteaters':'UC Irvine',
 'Utah Valley Wolverines':'Utah Valley',
 'Santa Clara Broncos':'Santa Clara',
 'UMass Lowell River Hawks':'UMass Lowell',
 'Colgate Raiders':'Colgate',
 'Grand Canyon Lopes':'Grand Canyon',
 'Akron Zips':'Akron',
 'Duquesne Dukes':'Duquesne',
 'Ole Miss Rebels':'Mississippi',
 "Louisiana Ragin' Cajuns":'Louisiana',
 'Stephen F. Austin Lumberjacks':'Stephen F. Austin',
 'UNC Greensboro Spartans':'UNC Greensboro',
 'Southern Miss Golden Eagles':'Southern Miss',
 'Syracuse Orange':'Syracuse',
 'Youngstown State Penguins':'Youngstown St.',
 'Vermont Catamounts':'Vermont',
 'Montana State Bobcats':'Montana St.',
 'Southern Utah Thunderbirds':'Southern Utah',
 'Temple Owls':'Temple',
 'Nebraska Cornhuskers':'Nebraska',
 'Washington Huskies':'Washington',
 'Wichita State Shockers':'Wichita St.',
 'South Alabama Jaguars':'South Alabama',
 'Belmont Bruins':'Belmont',
 'Princeton Tigers':'Princeton',
 'Loyola Marymount Lions':'Loyola Marymount',
 'Butler Bulldogs':'Butler',
 'San José State Spartans':'San Jose St.',
 'Eastern Washington Eagles':'Eastern Washington',
 'Southern Illinois Salukis':'Southern Illinois',
 'Pennsylvania Quakers':'Penn',
 'Charlotte 49ers':'Charlotte',
 'Troy Trojans':'Troy',
 'Towson Tigers':'Towson',
 'Ohio Bobcats':'Ohio',
 'George Mason Patriots':'George Mason',
 "Hawai'i Rainbow Warriors":'Hawaii',
 'San Francisco Dons':'San Francisco',
 'Colorado State Rams':'Colorado St.',
 'Cornell Big Red':'Cornell',
 'UC Santa Barbara Gauchos':'UC Santa Barbara',
 'LSU Tigers':'LSU',
 'Middle Tennessee Blue Raiders':'Middle Tennessee',
 'UNC Asheville Bulldogs':'UNC Asheville',
 'Kennesaw State Owls':'Kennesaw St.',
 'South Florida Bulls':'South Florida',
 'Longwood Lancers':'Longwood',
 'Fordham Rams':'Fordham',
 'Georgia Bulldogs':'Georgia',
 'UC Riverside Highlanders':'UC Riverside',
 'Missouri State Bears':'Missouri St.',
 'Cal State Fullerton Titans':'Cal St. Fullerton',
 'California Baptist Lancers':'Cal Baptist',
 'Richmond Spiders':'Richmond',
 'Notre Dame Fighting Irish':'Notre Dame',
 'Bryant Bulldogs':'Bryant',
 'Seattle U Redhawks':'Seattle',
 'Tarleton Texans':'Tarleton St.',
 'Ball State Cardinals':'Ball St.',
 'Chattanooga Mocs':'Chattanooga',
 'North Carolina Central Eagles':'North Carolina Central',
 "Gardner-Webb Runnin' Bulldogs":'Gardner Webb',
 'Samford Bulldogs':'Samford',
 'DePaul Blue Demons':'DePaul',
 'Northern Kentucky Norse':'Northern Kentucky',
 'Radford Highlanders':'Radford',
 'UNC Wilmington Seahawks':'UNC Wilmington',
 'Abilene Christian Wildcats':'Abilene Christian',
 'Appalachian State Mountaineers':'Appalachian St.',
 'Davidson Wildcats':'Davidson',
 'Boston College Eagles':'Boston College',
 'South Dakota State Jackrabbits':'South Dakota St.',
 'Eastern Kentucky Colonels':'Eastern Kentucky',
 'Wright State Raiders':'Wright St.',
 'Florida Gulf Coast Eagles':'Florida Gulf Coast',
 'Lipscomb Bisons':'Lipscomb',
 'UC Davis Aggies':'UC Davis',
 'Stetson Hatters':'Stetson',
 'Long Beach State Beach':'Long Beach St.',
 'Western Kentucky Hilltoppers':'Western Kentucky',
 'Louisiana Tech Bulldogs':'Louisiana Tech',
 'Old Dominion Monarchs':'Old Dominion',
 'Rider Broncs':'Rider',
 'Navy Midshipmen':'Navy',
 'Brown Bears':'Brown',
 'Utah Tech Trailblazers':'Utah Tech',
 'Quinnipiac Bobcats':'Quinnipiac',
 'UMass Minutemen':'Massachusetts',
 'Drexel Dragons':'Drexel',
 'Norfolk State Spartans':'Norfolk St.',
 'St. Bonaventure Bonnies':'St. Bonaventure',
 'Montana Grizzlies':'Montana',
 'New Mexico State Aggies':'New Mexico St.',
 'Siena Saints':'Siena',
 'Harvard Crimson':'Harvard',
 'Georgia Tech Yellow Jackets':'Georgia Tech',
 'Cleveland State Vikings':'Cleveland St.',
 'Fresno State Bulldogs':'Fresno St.',
 'Detroit Mercy Titans':'Detroit Mercy',
 'Portland Pilots':'Portland',
 'Florida State Seminoles':'Florida St.',
 'SMU Mustangs':'SMU',
 "Saint Joseph's Hawks":"Saint Joseph's",
 'Northern Iowa Panthers':'Northern Iowa',
 'Purdue Fort Wayne Mastodons':'Purdue Fort Wayne',
 'Buffalo Bulls':'Buffalo',
 'Air Force Falcons':'Air Force',
 'St. Thomas - Minnesota Tommies':'St. Thomas',
 'Milwaukee Panthers':'Milwaukee',
 'Grambling Tigers':'Grambling St.',
 'Georgia Southern Eagles':'Georgia Southern',
 'East Carolina Pirates':'East Carolina',
 'Wyoming Cowboys':'Wyoming',
 'Georgetown Hoyas':'Georgetown',
 'Portland State Vikings':'Portland St.',
 'George Washington Colonials':'George Washington',
 'Mercer Bears':'Mercer',
 'Delaware Blue Hens':'Delaware',
 'Murray State Racers':'Murray St.',
 'Western Carolina Catamounts':'Western Carolina',
 'Wofford Terriers':'Wofford',
 'Rice Owls':'Rice',
 'Texas A&M-Corpus Christi Islanders':'Texas A&M Corpus Chris',
 'La Salle Explorers':'La Salle',
 'Minnesota Golden Gophers':'Minnesota',
 'Jacksonville Dolphins':'Jacksonville',
 'UTEP Miners':'UTEP',
 'UMBC Retrievers':'UMBC',
 'Queens University Royals':'Queens',
 'Campbell Fighting Camels':'Campbell',
 'Weber State Wildcats':'Weber St.',
 'Robert Morris Colonials':'Robert Morris',
 'Rhode Island Rams':'Rhode Island',
 'Fairfield Stags':'Fairfield',
 'Northwestern State Demons':'Northwestern St.',
 'Pepperdine Waves':'Pepperdine',
 'Army Black Knights':'Army',
 'Oregon State Beavers':'Oregon St.',
 'North Florida Ospreys':'North Florida',
 'South Carolina Gamecocks':'South Carolina',
 'Pacific Tigers':'Pacific',
 'Morehead State Eagles':'Morehead St.',
 'Sacramento State Hornets':'Sacramento St.',
 'Maryland-Eastern Shore Hawks':'Maryland Eastern Shore',
 'Bellarmine Knights':'Bellarmine',
 'Nicholls Colonels':'Nicholls St.',
 'North Dakota State Bison':'North Dakota',
 'Boston University Terriers':'Boston University',
 'Texas State Bobcats':'Texas St.',
 'Winthrop Eagles':'Winthrop',
 'UT Arlington Mavericks':'UT Arlington',
 'Niagara Purple Eagles':'Niagara',
 'SIU Edwardsville Cougars':'SIU Edwardsville',
 'Loyola Chicago Ramblers':'Loyola Chicago',
 'Jacksonville State Gamecocks':'Jacksonville St.',
 'Southern Jaguars':'Southern',
 'UT Martin Skyhawks':'Tennessee Martin',
 'Northern Arizona Lumberjacks':'Northern Arizona',
 'Coastal Carolina Chanticleers':'Coastal Carolina',
 'North Alabama Lions':'North Alabama',
 'San Diego Toreros':'San Diego',
 'Howard Bison':'Howard',
 'Canisius Golden Griffins':'Canisius',
 'East Tennessee State Buccaneers':'East Tennessee St.',
 'Lafayette Leopards':'Lafayette',
 'Oakland Golden Grizzlies':'Oakland',
 'South Carolina Upstate Spartans':'USC Upstate',
 'Dartmouth Big Green':'Dartmouth',
 'UT Rio Grande Valley Vaqueros':'UT Rio Grande Valley',
 'Southeast Missouri State Redhawks':'Southeast Missouri St.',
 'Maine Black Bears':'Maine',
 'American University Eagles':'American',
 'Florida International Panthers':'FIU',
 'Morgan State Bears':'Morgan St.',
 'Lehigh Mountain Hawks':'Lehigh',
 'Illinois State Redbirds':'Illinois St.',
 'Northern Colorado Bears':'Northern Colorado',
 'Bowling Green Falcons':'Bowling Green',
 'Bucknell Bison':'Bucknell',
 'Alcorn State Braves':'Alcorn St.',
 'Miami (OH) Redhawks':'Miami OH',
 'SE Louisiana Lions':'Southeastern Louisiana',
 'Manhattan Jaspers':'Manhattan',
 'New Hampshire Wildcats':'New Hampshire',
 'UL Monroe Warhawks':'Louisiana Monroe',
 'Georgia State Panthers':'Georgia St.',
 "Mount St. Mary's Mountaineers":"Mount St. Mary's",
 'Wagner Seahawks':'Wagner',
 'Southern Indiana Screaming Eagles':'Southern Indiana',
 'Prairie View A&M Panthers':'Prairie View A&M',
 'Idaho State Bengals':'Idaho St.',
 'Western Illinois Leathernecks':'Western Illinois',
 'Northern Illinois Huskies':'Northern Illinois',
 'Valparaiso Beacons':'Valparaiso',
 'Merrimack Warriors':'Merrimack',
 'Chicago State Cougars':'Chicago St.',
 'Tennessee Tech Golden Eagles':'Tennessee Tech',
 'Charleston Southern Buccaneers':'Charleston Southern',
 'Marist Red Foxes':'Marist',
 'Idaho Vandals':'Idaho',
 'Fairleigh Dickinson Knights':'Fairleigh Dickinson',
 'Sacred Heart Pioneers':'Sacred Heart',
 'Louisville Cardinals':'Louisville',
 'High Point Panthers':'High Point',
 'Binghamton Bearcats':'Binghamton',
 'California Golden Bears':'California',
 'North Carolina A&T Aggies':'North Carolina A&T',
 'Tennessee State Tigers':'Tennessee St.',
 'North Dakota Fighting Hawks':'North Dakota St.',
 'Arkansas State Red Wolves':'Arkansas St.',
 'UC San Diego Tritons':'UC San Diego',
 'Texas Southern Tigers':'Texas Southern',
 'William & Mary Tribe':'William & Mary',
 'UIC Flames':'Illinois Chicago',
 "Saint Peter's Peacocks":"Saint Peter's",
 'St. Francis (PA) Red Flash':'St. Francis PA',
 'Cal State Bakersfield Roadrunners':'Cal St. Bakersfield',
 'Jackson State Tigers':'Jackson St.',
 'Northeastern Huskies':'Northeastern',
 'Texas A&M-Commerce Lions':'Texas A&M Commerce',
 'Kansas City Roos':'UMKC',
 'Loyola Maryland Greyhounds':'Loyola MD',
 'South Dakota Coyotes':'South Dakota',
 'The Citadel Bulldogs':'The Citadel',
 'Stonehill Skyhawks':'Stonehill',
 'Tulsa Golden Hurricane':'Tulsa',
 'NJIT Highlanders':'NJIT',
 'Western Michigan Broncos':'Western Michigan',
 'Cal State Northridge Matadors':'Cal St. Northridge',
 'Denver Pioneers':'Denver',
 'Stony Brook Seawolves':'Stony Brook',
 'Eastern Michigan Eagles':'Eastern Michigan',
 'Austin Peay Governors':'Austin Peay',
 'Little Rock Trojans':'Little Rock',
 'UTSA Roadrunners':'UTSA',
 'Central Connecticut Blue Devils':'Central Connecticut',
 'Alabama A&M Bulldogs':'Alabama A&M',
 'Elon Phoenix':'Elon',
 'Central Michigan Chippewas':'Central Michigan',
 'Omaha Mavericks':'Nebraska Omaha',
 'Coppin State Eagles':'Coppin St.',
 'Columbia Lions':'Columbia',
 'Central Arkansas Bears':'Central Arkansas',
 'Cal Poly Mustangs':'Cal Poly',
 'Arkansas-Pine Bluff Golden Lions':'Arkansas Pine Bluff',
 'Eastern Illinois Panthers':'Eastern Illinois',
 'South Carolina State Bulldogs':'South Carolina St.',
 'Holy Cross Crusaders':'Holy Cross',
 'McNeese Cowboys':'McNeese St.',
 'Albany Great Danes':'Albany',
 'Lindenwood Lions':'Lindenwood',
 'Hampton Pirates':'Hampton',
 'Presbyterian Blue Hose':'Presbyterian',
 'St. Francis Brooklyn Terriers':'St. Francis NY',
 'VMI Keydets':'VMI',
 'Incarnate Word Cardinals':'Incarnate Word',
 'Bethune-Cookman Wildcats':'Bethune Cookman',
 'Alabama State Hornets':'Alabama St.',
 'Houston Christian Huskies':'Houston Christian',
 'Evansville Purple Aces':'Evansville',
 'New Orleans Privateers':'New Orleans',
 'Monmouth Hawks':'Monmouth',
 'Delaware State Hornets':'Delaware St.',
 'Lamar Cardinals':'Lamar',
 'Mississippi Valley State Delta Devils':'Mississippi Valley St.',
 'Florida A&M Rattlers':'Florida A&M',
 'Hartford Hawks':'Hartford',
 'IUPUI Jaguars':'IUPUI',
 'Long Island University Sharks':'LIU',
 'Green Bay Phoenix':'Green Bay',

})

Save to a csv and check data for correction.

In [None]:
df.to_csv('BPI.csv', index=False)
print(df.head())
print('\n\nESPN BPI extraction completed')

### Resume
Data is from ESPN website and will include resume/strength of schedule rankings.

In [None]:
path = r'C:\chromedriver/chromedriver'
service = Service(executable_path = path)
browser = webdriver.Chrome(executable_path = path)
browser.get('https://www.espn.com/mens-college-basketball/bpi/_/view/resume')

html = browser.page_source.encode('utf-8')

page_num = 0

try:
    while browser.find_element(By.CLASS_NAME, "loadMore"):
        browser.find_element(By.CLASS_NAME, "loadMore__link").click()
        page_num += 1
        print("getting page number "+str(page_num))
        time.sleep(1)
except Exception as e:
    print("\n\nEnd of Page")

Parse through data and create the dataframe.

In [None]:
soup = bs4.BeautifulSoup(browser.page_source, "html.parser")
tables = soup.find_all('table')

resume_data = {
    'team': [],
    'SOR': [],
    'BPI_wins': [],
    'SOS': [],
    'NC_SOS': [],
}

table_names = tables[0]
names = table_names.find('tbody')
names_rows = names.find_all('tr')

table_data = tables[1]
data = table_data.find('tbody')
rows = data.find_all('tr')

for row in names_rows:
    team = row.find_all('td')[0].text
    
    resume_data['team'].append(team)

for row in rows:
    SOR = row.find_all('td')[1].text
    BPI_wins = row.find_all('td')[4].text
    SOS = row.find_all('td')[5].text
    NC_SOS = row.find_all('td')[6].text

    resume_data['SOR'].append(SOR)
    resume_data['BPI_wins'].append(BPI_wins)
    resume_data['SOS'].append(SOS)
    resume_data['NC_SOS'].append(NC_SOS)
    
df2 = pd.DataFrame.from_dict(resume_data)

Continue consistency for school names.

In [None]:
df2['team'] = df2.team.replace({'Houston Cougars':'Houston',
 'Tennessee Volunteers':'Tennessee',
 'Alabama Crimson Tide':'Alabama',
 'UCLA Bruins':'UCLA',
 'UConn Huskies':'Connecticut',
 'Purdue Boilermakers':'Purdue',
 'Texas Longhorns':'Texas',
 'Baylor Bears':'Baylor',
 'Gonzaga Bulldogs':'Gonzaga',
 'Kansas Jayhawks':'Kansas',
 'Creighton Bluejays':'Creighton',
 'Arizona Wildcats':'Arizona',
 'Arkansas Razorbacks':'Arkansas',
 'Marquette Golden Eagles':'Marquette',
 "Saint Mary's Gaels":"Saint Mary's",
 'Kentucky Wildcats':'Kentucky',
 'Indiana Hoosiers':'Indiana',
 'Maryland Terrapins':'Maryland',
 'Texas A&M Aggies':'Texas A&M',
 'Xavier Musketeers':'Xavier',
 'Rutgers Scarlet Knights':'Rutgers',
 'West Virginia Mountaineers':'West Virginia',
 'TCU Horned Frogs':'TCU',
 'Virginia Cavaliers':'Virginia',
 'San Diego State Aztecs':'San Diego St.',
 'Kansas State Wildcats':'Kansas St.',
 'Duke Blue Devils':'Duke',
 'Iowa State Cyclones':'Iowa St.',
 'Auburn Tigers':'Auburn',
 'Iowa Hawkeyes':'Iowa',
 'Illinois Fighting Illini':'Illinois',
 'Providence Friars':'Providence',
 'Miami Hurricanes':'Miami FL',
 'Michigan State Spartans':'Michigan St.',
 'Liberty Flames':'Liberty',
 'North Carolina Tar Heels':'North Carolina',
 'Oklahoma State Cowboys':'Oklahoma St.',
 'Mississippi State Bulldogs':'Mississippi St.',
 'Memphis Tigers':'Memphis',
 'Ohio State Buckeyes':'Ohio St.',
 'Florida Atlantic Owls':'Florida Atlantic',
 'NC State Wolfpack':'N.C. State',
 'Oregon Ducks':'Oregon',
 'Boise State Broncos':'Boise St.',
 'Northwestern Wildcats':'Northwestern',
 'Texas Tech Red Raiders':'Texas Tech',
 'Dayton Flyers':'Dayton',
 'Sam Houston Bearkats':'Sam Houston St.',
 'Florida Gators':'Florida',
 'USC Trojans':'USC',
 'Yale Bulldogs':'Yale',
 'Utah State Aggies':'Utah St.',
 'Penn State Nittany Lions':'Penn St.',
 'Iona Gaels':'Iona',
 'Bradley Braves':'Bradley',
 'Michigan Wolverines':'Michigan',
 'Clemson Tigers':'Clemson',
 'UCF Knights':'UCF',
 'Cincinnati Bearcats':'Cincinnati',
 'Oklahoma Sooners':'Oklahoma',
 'Charleston Cougars':'Charleston',
 'Missouri Tigers':'Missouri',
 'Virginia Tech Hokies':'Virginia Tech',
 'Pittsburgh Panthers':'Pittsburgh',
 'VCU Rams':'VCU',
 'UAB Blazers':'UAB',
 'Nevada Wolf Pack':'Nevada',
 'Drake Bulldogs':'Drake',
 'Wisconsin Badgers':'Wisconsin',
 'Washington State Cougars':'Washington St.',
 'Arizona State Sun Devils':'Arizona St.',
 'Villanova Wildcats':'Villanova',
 'Utah Utes':'Utah',
 'Marshall Thundering Herd':'Marshall',
 'New Mexico Lobos':'New Mexico',
 'Kent State Golden Flashes':'Kent St.',
 'James Madison Dukes':'James Madison',
 'Seton Hall Pirates':'Seton Hall',
 'Wake Forest Demon Deacons':'Wake Forest',
 'Toledo Rockets':'Toledo',
 'Colorado Buffaloes':'Colorado',
 'North Texas Mean Green':'North Texas',
 'Hofstra Pride':'Hofstra',
 'Stanford Cardinal':'Stanford',
 'Vanderbilt Commodores':'Vanderbilt',
 'Oral Roberts Golden Eagles':'Oral Roberts',
 "St. John's Red Storm":"St. John's",
 'Indiana State Sycamores':'Indiana St.',
 'UNLV Rebels':'UNLV',
 'Furman Paladins':'Furman',
 'Saint Louis Billikens':'Saint Louis',
 'Tulane Green Wave':'Tulane',
 'BYU Cougars':'BYU',
 'UC Irvine Anteaters':'UC Irvine',
 'Utah Valley Wolverines':'Utah Valley',
 'Santa Clara Broncos':'Santa Clara',
 'UMass Lowell River Hawks':'UMass Lowell',
 'Colgate Raiders':'Colgate',
 'Grand Canyon Lopes':'Grand Canyon',
 'Akron Zips':'Akron',
 'Duquesne Dukes':'Duquesne',
 'Ole Miss Rebels':'Mississippi',
 "Louisiana Ragin' Cajuns":'Louisiana',
 'Stephen F. Austin Lumberjacks':'Stephen F. Austin',
 'UNC Greensboro Spartans':'UNC Greensboro',
 'Southern Miss Golden Eagles':'Southern Miss',
 'Syracuse Orange':'Syracuse',
 'Youngstown State Penguins':'Youngstown St.',
 'Vermont Catamounts':'Vermont',
 'Montana State Bobcats':'Montana St.',
 'Southern Utah Thunderbirds':'Southern Utah',
 'Temple Owls':'Temple',
 'Nebraska Cornhuskers':'Nebraska',
 'Washington Huskies':'Washington',
 'Wichita State Shockers':'Wichita St.',
 'South Alabama Jaguars':'South Alabama',
 'Belmont Bruins':'Belmont',
 'Princeton Tigers':'Princeton',
 'Loyola Marymount Lions':'Loyola Marymount',
 'Butler Bulldogs':'Butler',
 'San José State Spartans':'San Jose St.',
 'Eastern Washington Eagles':'Eastern Washington',
 'Southern Illinois Salukis':'Southern Illinois',
 'Pennsylvania Quakers':'Penn',
 'Charlotte 49ers':'Charlotte',
 'Troy Trojans':'Troy',
 'Towson Tigers':'Towson',
 'Ohio Bobcats':'Ohio',
 'George Mason Patriots':'George Mason',
 "Hawai'i Rainbow Warriors":'Hawaii',
 'San Francisco Dons':'San Francisco',
 'Colorado State Rams':'Colorado St.',
 'Cornell Big Red':'Cornell',
 'UC Santa Barbara Gauchos':'UC Santa Barbara',
 'LSU Tigers':'LSU',
 'Middle Tennessee Blue Raiders':'Middle Tennessee',
 'UNC Asheville Bulldogs':'UNC Asheville',
 'Kennesaw State Owls':'Kennesaw St.',
 'South Florida Bulls':'South Florida',
 'Longwood Lancers':'Longwood',
 'Fordham Rams':'Fordham',
 'Georgia Bulldogs':'Georgia',
 'UC Riverside Highlanders':'UC Riverside',
 'Missouri State Bears':'Missouri St.',
 'Cal State Fullerton Titans':'Cal St. Fullerton',
 'California Baptist Lancers':'Cal Baptist',
 'Richmond Spiders':'Richmond',
 'Notre Dame Fighting Irish':'Notre Dame',
 'Bryant Bulldogs':'Bryant',
 'Seattle U Redhawks':'Seattle',
 'Tarleton Texans':'Tarleton St.',
 'Ball State Cardinals':'Ball St.',
 'Chattanooga Mocs':'Chattanooga',
 'North Carolina Central Eagles':'North Carolina Central',
 "Gardner-Webb Runnin' Bulldogs":'Gardner Webb',
 'Samford Bulldogs':'Samford',
 'DePaul Blue Demons':'DePaul',
 'Northern Kentucky Norse':'Northern Kentucky',
 'Radford Highlanders':'Radford',
 'UNC Wilmington Seahawks':'UNC Wilmington',
 'Abilene Christian Wildcats':'Abilene Christian',
 'Appalachian State Mountaineers':'Appalachian St.',
 'Davidson Wildcats':'Davidson',
 'Boston College Eagles':'Boston College',
 'South Dakota State Jackrabbits':'South Dakota St.',
 'Eastern Kentucky Colonels':'Eastern Kentucky',
 'Wright State Raiders':'Wright St.',
 'Florida Gulf Coast Eagles':'Florida Gulf Coast',
 'Lipscomb Bisons':'Lipscomb',
 'UC Davis Aggies':'UC Davis',
 'Stetson Hatters':'Stetson',
 'Long Beach State Beach':'Long Beach St.',
 'Western Kentucky Hilltoppers':'Western Kentucky',
 'Louisiana Tech Bulldogs':'Louisiana Tech',
 'Old Dominion Monarchs':'Old Dominion',
 'Rider Broncs':'Rider',
 'Navy Midshipmen':'Navy',
 'Brown Bears':'Brown',
 'Utah Tech Trailblazers':'Utah Tech',
 'Quinnipiac Bobcats':'Quinnipiac',
 'UMass Minutemen':'Massachusetts',
 'Drexel Dragons':'Drexel',
 'Norfolk State Spartans':'Norfolk St.',
 'St. Bonaventure Bonnies':'St. Bonaventure',
 'Montana Grizzlies':'Montana',
 'New Mexico State Aggies':'New Mexico St.',
 'Siena Saints':'Siena',
 'Harvard Crimson':'Harvard',
 'Georgia Tech Yellow Jackets':'Georgia Tech',
 'Cleveland State Vikings':'Cleveland St.',
 'Fresno State Bulldogs':'Fresno St.',
 'Detroit Mercy Titans':'Detroit Mercy',
 'Portland Pilots':'Portland',
 'Florida State Seminoles':'Florida St.',
 'SMU Mustangs':'SMU',
 "Saint Joseph's Hawks":"Saint Joseph's",
 'Northern Iowa Panthers':'Northern Iowa',
 'Purdue Fort Wayne Mastodons':'Purdue Fort Wayne',
 'Buffalo Bulls':'Buffalo',
 'Air Force Falcons':'Air Force',
 'St. Thomas - Minnesota Tommies':'St. Thomas',
 'Milwaukee Panthers':'Milwaukee',
 'Grambling Tigers':'Grambling St.',
 'Georgia Southern Eagles':'Georgia Southern',
 'East Carolina Pirates':'East Carolina',
 'Wyoming Cowboys':'Wyoming',
 'Georgetown Hoyas':'Georgetown',
 'Portland State Vikings':'Portland St.',
 'George Washington Colonials':'George Washington',
 'Mercer Bears':'Mercer',
 'Delaware Blue Hens':'Delaware',
 'Murray State Racers':'Murray St.',
 'Western Carolina Catamounts':'Western Carolina',
 'Wofford Terriers':'Wofford',
 'Rice Owls':'Rice',
 'Texas A&M-Corpus Christi Islanders':'Texas A&M Corpus Chris',
 'La Salle Explorers':'La Salle',
 'Minnesota Golden Gophers':'Minnesota',
 'Jacksonville Dolphins':'Jacksonville',
 'UTEP Miners':'UTEP',
 'UMBC Retrievers':'UMBC',
 'Queens University Royals':'Queens',
 'Campbell Fighting Camels':'Campbell',
 'Weber State Wildcats':'Weber St.',
 'Robert Morris Colonials':'Robert Morris',
 'Rhode Island Rams':'Rhode Island',
 'Fairfield Stags':'Fairfield',
 'Northwestern State Demons':'Northwestern St.',
 'Pepperdine Waves':'Pepperdine',
 'Army Black Knights':'Army',
 'Oregon State Beavers':'Oregon St.',
 'North Florida Ospreys':'North Florida',
 'South Carolina Gamecocks':'South Carolina',
 'Pacific Tigers':'Pacific',
 'Morehead State Eagles':'Morehead St.',
 'Sacramento State Hornets':'Sacramento St.',
 'Maryland-Eastern Shore Hawks':'Maryland Eastern Shore',
 'Bellarmine Knights':'Bellarmine',
 'Nicholls Colonels':'Nicholls St.',
 'North Dakota State Bison':'North Dakota',
 'Boston University Terriers':'Boston University',
 'Texas State Bobcats':'Texas St.',
 'Winthrop Eagles':'Winthrop',
 'UT Arlington Mavericks':'UT Arlington',
 'Niagara Purple Eagles':'Niagara',
 'SIU Edwardsville Cougars':'SIU Edwardsville',
 'Loyola Chicago Ramblers':'Loyola Chicago',
 'Jacksonville State Gamecocks':'Jacksonville St.',
 'Southern Jaguars':'Southern',
 'UT Martin Skyhawks':'Tennessee Martin',
 'Northern Arizona Lumberjacks':'Northern Arizona',
 'Coastal Carolina Chanticleers':'Coastal Carolina',
 'North Alabama Lions':'North Alabama',
 'San Diego Toreros':'San Diego',
 'Howard Bison':'Howard',
 'Canisius Golden Griffins':'Canisius',
 'East Tennessee State Buccaneers':'East Tennessee St.',
 'Lafayette Leopards':'Lafayette',
 'Oakland Golden Grizzlies':'Oakland',
 'South Carolina Upstate Spartans':'USC Upstate',
 'Dartmouth Big Green':'Dartmouth',
 'UT Rio Grande Valley Vaqueros':'UT Rio Grande Valley',
 'Southeast Missouri State Redhawks':'Southeast Missouri St.',
 'Maine Black Bears':'Maine',
 'American University Eagles':'American',
 'Florida International Panthers':'FIU',
 'Morgan State Bears':'Morgan St.',
 'Lehigh Mountain Hawks':'Lehigh',
 'Illinois State Redbirds':'Illinois St.',
 'Northern Colorado Bears':'Northern Colorado',
 'Bowling Green Falcons':'Bowling Green',
 'Bucknell Bison':'Bucknell',
 'Alcorn State Braves':'Alcorn St.',
 'Miami (OH) Redhawks':'Miami OH',
 'SE Louisiana Lions':'Southeastern Louisiana',
 'Manhattan Jaspers':'Manhattan',
 'New Hampshire Wildcats':'New Hampshire',
 'UL Monroe Warhawks':'Louisiana Monroe',
 'Georgia State Panthers':'Georgia St.',
 "Mount St. Mary's Mountaineers":"Mount St. Mary's",
 'Wagner Seahawks':'Wagner',
 'Southern Indiana Screaming Eagles':'Southern Indiana',
 'Prairie View A&M Panthers':'Prairie View A&M',
 'Idaho State Bengals':'Idaho St.',
 'Western Illinois Leathernecks':'Western Illinois',
 'Northern Illinois Huskies':'Northern Illinois',
 'Valparaiso Beacons':'Valparaiso',
 'Merrimack Warriors':'Merrimack',
 'Chicago State Cougars':'Chicago St.',
 'Tennessee Tech Golden Eagles':'Tennessee Tech',
 'Charleston Southern Buccaneers':'Charleston Southern',
 'Marist Red Foxes':'Marist',
 'Idaho Vandals':'Idaho',
 'Fairleigh Dickinson Knights':'Fairleigh Dickinson',
 'Sacred Heart Pioneers':'Sacred Heart',
 'Louisville Cardinals':'Louisville',
 'High Point Panthers':'High Point',
 'Binghamton Bearcats':'Binghamton',
 'California Golden Bears':'California',
 'North Carolina A&T Aggies':'North Carolina A&T',
 'Tennessee State Tigers':'Tennessee St.',
 'North Dakota Fighting Hawks':'North Dakota St.',
 'Arkansas State Red Wolves':'Arkansas St.',
 'UC San Diego Tritons':'UC San Diego',
 'Texas Southern Tigers':'Texas Southern',
 'William & Mary Tribe':'William & Mary',
 'UIC Flames':'Illinois Chicago',
 "Saint Peter's Peacocks":"Saint Peter's",
 'St. Francis (PA) Red Flash':'St. Francis PA',
 'Cal State Bakersfield Roadrunners':'Cal St. Bakersfield',
 'Jackson State Tigers':'Jackson St.',
 'Northeastern Huskies':'Northeastern',
 'Texas A&M-Commerce Lions':'Texas A&M Commerce',
 'Kansas City Roos':'UMKC',
 'Loyola Maryland Greyhounds':'Loyola MD',
 'South Dakota Coyotes':'South Dakota',
 'The Citadel Bulldogs':'The Citadel',
 'Stonehill Skyhawks':'Stonehill',
 'Tulsa Golden Hurricane':'Tulsa',
 'NJIT Highlanders':'NJIT',
 'Western Michigan Broncos':'Western Michigan',
 'Cal State Northridge Matadors':'Cal St. Northridge',
 'Denver Pioneers':'Denver',
 'Stony Brook Seawolves':'Stony Brook',
 'Eastern Michigan Eagles':'Eastern Michigan',
 'Austin Peay Governors':'Austin Peay',
 'Little Rock Trojans':'Little Rock',
 'UTSA Roadrunners':'UTSA',
 'Central Connecticut Blue Devils':'Central Connecticut',
 'Alabama A&M Bulldogs':'Alabama A&M',
 'Elon Phoenix':'Elon',
 'Central Michigan Chippewas':'Central Michigan',
 'Omaha Mavericks':'Nebraska Omaha',
 'Coppin State Eagles':'Coppin St.',
 'Columbia Lions':'Columbia',
 'Central Arkansas Bears':'Central Arkansas',
 'Cal Poly Mustangs':'Cal Poly',
 'Arkansas-Pine Bluff Golden Lions':'Arkansas Pine Bluff',
 'Eastern Illinois Panthers':'Eastern Illinois',
 'South Carolina State Bulldogs':'South Carolina St.',
 'Holy Cross Crusaders':'Holy Cross',
 'McNeese Cowboys':'McNeese St.',
 'Albany Great Danes':'Albany',
 'Lindenwood Lions':'Lindenwood',
 'Hampton Pirates':'Hampton',
 'Presbyterian Blue Hose':'Presbyterian',
 'St. Francis Brooklyn Terriers':'St. Francis NY',
 'VMI Keydets':'VMI',
 'Incarnate Word Cardinals':'Incarnate Word',
 'Bethune-Cookman Wildcats':'Bethune Cookman',
 'Alabama State Hornets':'Alabama St.',
 'Houston Christian Huskies':'Houston Christian',
 'Evansville Purple Aces':'Evansville',
 'New Orleans Privateers':'New Orleans',
 'Monmouth Hawks':'Monmouth',
 'Delaware State Hornets':'Delaware St.',
 'Lamar Cardinals':'Lamar',
 'Mississippi Valley State Delta Devils':'Mississippi Valley St.',
 'Florida A&M Rattlers':'Florida A&M',
 'Hartford Hawks':'Hartford',
 'IUPUI Jaguars':'IUPUI',
 'Long Island University Sharks':'LIU',
 'Green Bay Phoenix':'Green Bay',
})

In [None]:
df2.to_csv('Resume.csv', index=False)
print(df2.head())
print('\n\nESPN Resume extraction completed')

## Kenpom Efficiency

Need login to use module

In [None]:
with open('kenpom_log.json') as fin:
    credentials = json.load(fin)
    email = credentials['email']
    password = credentials['password']

browser = login(email, password)

Get efficiency stats and rankings from kenpom website. Save to a csv and check data correction.

In [None]:
eff_stats = kps.get_efficiency(browser)
eff_stats = eff_stats.reset_index(drop=True)
eff_stats = eff_stats.drop(columns = ['Tempo-Raw','Tempo-Raw.Rank','Avg. Poss Length-Offense','Avg. Poss Length-Offense.Rank',
                          'Avg. Poss Length-Defense','Avg. Poss Length-Defense.Rank','Off. Efficiency-Raw','Off. Efficiency-Raw.Rank',
                          'Def. Efficiency-Raw','Def. Efficiency-Raw.Rank'])

eff_stats = eff_stats.rename(columns = {'Conference':'Conf',
                            'Tempo-Adj': 'AdjT', 
                            'Tempo-Adj.Rank':'AdjT_Rank',
                            'Off. Efficiency-Adj':'AdjO',
                            'Off. Efficiency-Adj.Rank':'AdjO_Rank',
                            'Def. Efficiency-Adj':'AdjD',
                            'Def. Efficiency-Adj.Rank':'AdjD_Rank',})

eff_stats['AdjO'] = eff_stats['AdjO'].astype(float)
eff_stats['AdjD'] = eff_stats['AdjD'].astype(float)
eff_stats['AdjT'] = eff_stats['AdjT'].astype(float)
eff_stats['AdjEM'] = eff_stats['AdjO'] - eff_stats['AdjD']
eff_stats = eff_stats.sort_values(by=['AdjEM'],ascending=False).reset_index(drop=True)
eff_stats['AdjEM_Rank'] = eff_stats.AdjEM.rank(method='first', ascending=False).astype(int)

eff_stats.to_csv('kenpom_efficiency.csv', index=False)
print(eff_stats.head())
print('\n\nKenpom Efficiency extraction completed')

## Kenpom Team Schedules

Get schedules from kenpom website and append them to one dataframe

In [None]:
teams = kpt.get_valid_teams(browser)
full_schedule = pd.DataFrame()
num = 0

for team in teams:
    
    num += 1
    schedule = kpt.get_schedule(browser, team=team)

    schedule[['Results', 'Score']] = schedule.Result.str.split(pat=', ', expand=True)
    schedule = schedule.drop(columns = 'Result')

    for ind in schedule.index:
        if len(schedule['Results'][ind]) != 1:
            schedule['Results'][ind] = np.nan

    schedule = schedule.dropna()

    for ind in schedule.index:
        if len(schedule['Record'][ind]) < 3:
            schedule['Results'][ind] = ''
            schedule['Score'][ind] = ''

    schedule = schedule[['Date', 'Opponent Name', 'Location', 'Conference', 'Results', 'Score']]

    for ind in schedule.index:
        if len(schedule['Conference'][ind]) > 0:
            schedule['Conference'][ind] = '*'

    schedule[['win_score', 'lose_score']] = schedule.Score.str.split(pat='-', expand=True)

    schedule['team_score'] = ''
    schedule['opponent_score'] = ''

    for ind in schedule.index:
        if schedule['Results'][ind] == 'W':
            schedule['team_score'][ind] = schedule['win_score'][ind]
            schedule['opponent_score'][ind] = schedule['lose_score'][ind]
        elif schedule['Results'][ind] == 'L':
            schedule['team_score'][ind] = schedule['lose_score'][ind]
            schedule['opponent_score'][ind] = schedule['win_score'][ind]
        else:
            schedule['team_score'][ind] = ''
            schedule['opponent_score'][ind] = ''

    schedule = schedule.drop(columns=['Score','win_score', 'lose_score'])
    schedule['Team'] = team

    schedule = schedule[['Date', 'Team', 'Opponent Name', 'Location', 'Conference', 'Results',
           'team_score', 'opponent_score']]

    schedule['team_score'] = schedule['team_score'].replace('', '0').astype(int)
    schedule['opponent_score'] = schedule['opponent_score'].replace('', '0').astype(int)
    schedule['Date'] = pd.to_datetime(schedule['Date'],format= '%a %b %d' )
    schedule['Date'] = schedule['Date'].dt.strftime('%m-%d')

    full_schedule = full_schedule.append(schedule)
    time.sleep(4)
    print(f'{num}-{team} Schedule Done')

Fix date column and save to a csv. Look at shape and data for correction.

In [None]:
full_schedule['Date'] = pd.to_datetime(full_schedule['Date'],format= '%m-%d' )

full_schedule['Date'] = full_schedule['Date'].mask(full_schedule['Date'].dt.month > 6, 
                                                   full_schedule['Date'] + pd.offsets.DateOffset(year=2022))
full_schedule['Date'] = full_schedule['Date'].mask(full_schedule['Date'].dt.month < 6, 
                                                   full_schedule['Date'] + pd.offsets.DateOffset(year=2023))

full_schedule.to_csv('team_schedules.csv', index=False)
print(full_schedule.shape)
print(full_schedule.tail())
print('\n\nTeam Schedules extraction completed')

In [None]:
print('\n\nData Extration process completed')