# Web-Scrapping 2021 Division 1 NCAA basketball data 

### Ryan Standridge

URL to data: https://www.ncaa.com/rankings/basketball-men/d1/ncaa-mens-basketball-net-rankings

In [1]:
# Import modules as needed
import requests # pip install requests -- loads in the webpage to scrape
from bs4 import BeautifulSoup as bs

import pandas as pd

In [2]:
# Load the webpage content
URL = "https://www.ncaa.com/rankings/basketball-men/d1/ncaa-mens-basketball-net-rankings"
r = requests.get(URL)

# Convert to a beautiful soup object
soup = bs(r.content)

# Print out the html
print(soup.prettify())

<!DOCTYPE html>
<html dir="ltr" lang="en" prefix="og: https://ogp.me/ns#">
 <head>
  <meta charset="utf-8"/>
  <script type="text/javascript">
   (window.NREUM||(NREUM={})).init={ajax:{deny_list:["bam-cell.nr-data.net"]}};(window.NREUM||(NREUM={})).loader_config={xpid:"UgYCUlJACQMDVVZVDgIF",licenseKey:"b4c08c65aa",applicationID:"103177932"};window.NREUM||(NREUM={}),__nr_require=function(t,e,n){function r(n){if(!e[n]){var i=e[n]={exports:{}};t[n][0].call(i.exports,function(e){var i=t[n][1][e];return r(i||e)},i,i.exports)}return e[n].exports}if("function"==typeof __nr_require)return __nr_require;for(var i=0;i<n.length;i++)r(n[i]);return r}({1:[function(t,e,n){function r(t){try{s.console&&console.log(t)}catch(e){}}var i,o=t("ee"),a=t(28),s={};try{i=localStorage.getItem("__nr_flags").split(","),console&&"function"==typeof console.log&&(s.console=!0,i.indexOf("dev")!==-1&&(s.dev=!0),i.indexOf("nr_dev")!==-1&&(s.nrDev=!0))}catch(c){}s.nrDev&&o.on("internal-error",function(t){r(t.stack)}),s.d

# Lets start scrapping the NCAA basketball ranking data

URL: https://www.ncaa.com/rankings/basketball-men/d1/ncaa-mens-basketball-net-rankings

# Get the rankings

## Exploring the Website to see what to extract

In [3]:
new = soup.find_all("tr")
print(new[1].find_all('td')[0].string)

1


## Extracting the Column names

In [4]:
columns = [x.string for x in soup.find_all('th')]
print(columns)

['Rank', 'Previous', 'School', 'Conference', 'Record', 'Road', 'Neutral', 'Home', 'Quad 1', 'Quad 2', 'Quad 3', 'Quad 4']


## Extracting the rows

In [5]:
rows = soup.find_all('tr')
print(rows)

[<tr>
<th>Rank</th>
<th>Previous</th>
<th>School</th>
<th>Conference</th>
<th>Record</th>
<th>Road</th>
<th>Neutral</th>
<th>Home</th>
<th>Quad 1</th>
<th>Quad 2</th>
<th>Quad 3</th>
<th>Quad 4</th>
</tr>, <tr>
<td>1</td>
<td>1</td>
<td>Baylor</td>
<td>Big 12</td>
<td>11-0</td>
<td>1-0</td>
<td>3-0</td>
<td>7-0</td>
<td>2-0</td>
<td>2-0</td>
<td>2-0</td>
<td>5-0</td>
</tr>, <tr>
<td>2</td>
<td>2</td>
<td>Arizona</td>
<td>Pac-12</td>
<td>11-1</td>
<td>2-1</td>
<td>2-0</td>
<td>7-0</td>
<td>2-1</td>
<td>2-0</td>
<td>0-0</td>
<td>7-0</td>
</tr>, <tr>
<td>3</td>
<td>3</td>
<td>LSU</td>
<td>SEC</td>
<td>12-0</td>
<td>0-0</td>
<td>4-0</td>
<td>8-0</td>
<td>1-0</td>
<td>2-0</td>
<td>5-0</td>
<td>4-0</td>
</tr>, <tr>
<td>4</td>
<td>4</td>
<td>Houston</td>
<td>AAC</td>
<td>11-2</td>
<td>0-1</td>
<td>3-1</td>
<td>8-0</td>
<td>0-2</td>
<td>3-0</td>
<td>4-0</td>
<td>4-0</td>
</tr>, <tr>
<td>5</td>
<td>5</td>
<td>Gonzaga</td>
<td>WCC</td>
<td>10-2</td>
<td>0-0</td>
<td>3-2</td>
<td>7-0</td>
<td>3-2

In [6]:
row_num = []
for i in range(1,len(rows)):
    row_num.append([y.string for y in rows[i].find_all('td')])

In [7]:
print(row_num)

[['1', '1', 'Baylor', 'Big 12', '11-0', '1-0', '3-0', '7-0', '2-0', '2-0', '2-0', '5-0'], ['2', '2', 'Arizona', 'Pac-12', '11-1', '2-1', '2-0', '7-0', '2-1', '2-0', '0-0', '7-0'], ['3', '3', 'LSU', 'SEC', '12-0', '0-0', '4-0', '8-0', '1-0', '2-0', '5-0', '4-0'], ['4', '4', 'Houston', 'AAC', '11-2', '0-1', '3-1', '8-0', '0-2', '3-0', '4-0', '4-0'], ['5', '5', 'Gonzaga', 'WCC', '10-2', '0-0', '3-2', '7-0', '3-2', '0-0', '0-0', '7-0'], ['6', '6', 'Purdue', 'Big Ten', '11-1', '0-1', '4-0', '7-0', '3-0', '1-0', '2-1', '5-0'], ['7', '7', 'Kansas', 'Big 12', '9-1', '1-0', '3-1', '5-0', '1-0', '3-0', '1-1', '4-0'], ['8', '8', 'Tennessee', 'SEC', '9-2', '1-0', '1-2', '7-0', '2-2', '1-0', '1-0', '5-0'], ['9', '9', 'Duke', 'ACC', '11-1', '0-1', '2-0', '9-0', '3-1', '0-0', '1-0', '7-0'], ['10', '10', 'Auburn', 'SEC', '11-1', '2-0', '3-1', '6-0', '2-1', '2-0', '2-0', '5-0'], ['11', '11', 'Michigan St.', 'Big Ten', '10-2', '2-0', '3-2', '5-0', '3-2', '1-0', '4-0', '2-0'], ['12', '12', 'Villanova', '

## Put data into a Pandas Dataframe

In [8]:
#import pandas as pd
df_rank = pd.DataFrame(row_num)

In [9]:
df_rank.columns = columns

In [10]:
# Quick Glance at the Rankings Dataset
## Note: We know the Rank Section has values of '-'
## We know that if the value is '-' then it is the same as the rank above
df_rank

Unnamed: 0,Rank,Previous,School,Conference,Record,Road,Neutral,Home,Quad 1,Quad 2,Quad 3,Quad 4
0,1,1,Baylor,Big 12,11-0,1-0,3-0,7-0,2-0,2-0,2-0,5-0
1,2,2,Arizona,Pac-12,11-1,2-1,2-0,7-0,2-1,2-0,0-0,7-0
2,3,3,LSU,SEC,12-0,0-0,4-0,8-0,1-0,2-0,5-0,4-0
3,4,4,Houston,AAC,11-2,0-1,3-1,8-0,0-2,3-0,4-0,4-0
4,5,5,Gonzaga,WCC,10-2,0-0,3-2,7-0,3-2,0-0,0-0,7-0
...,...,...,...,...,...,...,...,...,...,...,...,...
353,354,354,Mississippi Val.,SWAC,0-10,0-8,0-0,0-2,0-1,0-4,0-3,0-2
354,355,355,Eastern Ill.,OVC,0-11,0-8,0-1,0-2,0-2,0-1,0-3,0-5
355,356,356,IUPUI,Horizon,0-10,0-4,0-2,0-4,0-0,0-1,0-3,0-6
356,357,357,Maine,America East,0-7,0-5,0-0,0-2,0-1,0-2,0-0,0-4


# Lets get the other stats

Note: 

Scoring Offense: https://www.ncaa.com/stats/basketball-men/d1/current/team/145
https://www.ncaa.com/stats/basketball-men/d1/current/team/145/p2
https://www.ncaa.com/stats/basketball-men/d1/current/team/145/p7

Scoring Defense: https://www.ncaa.com/stats/basketball-men/d1/current/team/146
https://www.ncaa.com/stats/basketball-men/d1/current/team/146/p2
https://www.ncaa.com/stats/basketball-men/d1/current/team/146/p7

3-pt Field Goal Attempts: https://www.ncaa.com/stats/basketball-men/d1/current/team/625
https://www.ncaa.com/stats/basketball-men/d1/current/team/625/p2
https://www.ncaa.com/stats/basketball-men/d1/current/team/625/p7

Assist Turnover Ratio: https://www.ncaa.com/stats/basketball-men/d1/current/team/474
https://www.ncaa.com/stats/basketball-men/d1/current/team/474/p2
https://www.ncaa.com/stats/basketball-men/d1/current/team/474/p7

I Noticed how all of the links are almost identical except for the numbers.

So we can possibly loop through all of the statistics in one loop and add it to the dataframe once we get all of the numbers.

* ASSISTS PER GAME: 216
* BLOCKED SHOTS PER GAME: 214
* DEFENSIVE REBOUNDS PER GAME: 859
* FEWEST FOULS: 642
* FEWEST TURNOVERS: 640
* FIELD-GOAL PERCENTAGE: 148
* FIELD-GOAL PERCENTAGE DEFENSE: 149
* FREE THROW ATTEMPTS: 638 --these three say the same thing but add one each
* FREE THROWS MADE: 633 --
* FREE-THROW PERCENTAGE: 150 --keep
* OFFENSIVE REBOUNDS PER GAME: 857
* PERSONAL FOULS PER GAME: 286
* REBOUND MARGIN: 151
* SCORING MARGIN: 147
* STEALS PER GAME: 215
* THREE PT FG DEFENSE: 518
* THREE-POINT FIELD GOALS PER GAME: 153
* THREE-POINT FIELD-GOAL PERCENTAGE: 152
* TOTAL 3-POINT FGM: 622
* TOTAL ASSISTS: 606
* TOTAL BLOCKS: 609
* TOTAL REBOUNDS: 602
* TOTAL REBOUNDS PER GAME: 932
* TOTAL STEALS: 616
* TURNOVER MARGIN: 519
* TURNOVERS FORCED: 931
* TURNOVERS PER GAME: 217
* WON-LOST PERCENTAGE: 168

We will need to figure out which ones we really want to include in the dataset since a lot of them have the same information.


# Main

In [11]:
# Testing using a few stats -- Scoring Offense, Scoring Defense, and Won-Lost Percentage
num_list = ['145', '146', '168']

# intialize column list
columns_list = []

# Loop through stats
for num in num_list:

    # Load the webpage content -- using the base URL (no p2-p7) for column names/p1 and p2-p7 for other data
    # List of URLs for the Scoring Offense Section - Looping through the pages
    URL_stat_list = ['https://www.ncaa.com/stats/basketball-men/d1/current/team/' + num,
                     'https://www.ncaa.com/stats/basketball-men/d1/current/team/' + num + '/p2',
                     'https://www.ncaa.com/stats/basketball-men/d1/current/team/' + num + '/p3',
                     'https://www.ncaa.com/stats/basketball-men/d1/current/team/' + num + '/p4',
                     'https://www.ncaa.com/stats/basketball-men/d1/current/team/' + num + '/p5',
                     'https://www.ncaa.com/stats/basketball-men/d1/current/team/' + num + '/p6',
                     'https://www.ncaa.com/stats/basketball-men/d1/current/team/' + num + '/p7']
    
    ## Getting the column names
    r_col = requests.get(URL_stat_list[0])

    # Convert to a beautiful soup object
    soup_col = bs(r_col.content)

    columns = [x.string + num for x in soup_col.find_all('th')]
    
    ## Getting the data -- rows
    rows = soup_col.find_all('tr')
    
    ## just looping through the pages now
    # Initialize the list for rows
    #####row_num_ScOff = []
    data_list = []

    for url in URL_stat_list:
        # load each webpage
        r = requests.get(url)

        # Convert webpage to a beautiful soup object
        soup = bs(r.content)

        # get row data 
        #rows_ScOff_pages = soup_ScOff_pages.find_all('tr')
        stat_data = soup.find_all('tr')

        # initalize a new list to get all pages of data
        #row_num_ScOff_page = []
        page_data = []
        
        # Loop through each row of data except the first row (column names)
        for i in range(1,len(stat_data)):
            # add data to a new variable
            #row_num_ScOff_page = [y.string for y in rows_ScOff_pages[i].find_all('td')]
            page_data = [y.string for y in stat_data[i].find_all('td')]
            
            #print(row_num_ScOff_page[1]) -- used to visualize issues that arose
            #print(page_data[1]) -- used to visualize issues that arose

            # School Name is None since the html code is different -- Lets fix it
            if page_data[1] == None: # use 1 since the school/team column is column 1
                page_data[1] = stat_data[i].find_all('a')[0].string

            # append the data from the new page to the original list
            data_list.append(page_data)
    
    # testing to see how this runs -- might can make pandas df and write to csv for each variable then
    ## merge after
    #print(data_list) -- worked like i planned
    # Make into pandas dataframe
    df = pd.DataFrame(data_list)
    
    df.columns = columns
    
    # Rename the Rank Column to ScOff_Rank
    ##df = df.rename(columns = {'Rank':'ScOff_Rank', 'Team':'School'})
    
    file_name = 'NCAA_Stat' + num
    #print(file_name)
    
    df.to_csv(file_name, index = False)



In [12]:
new_df1 = pd.read_csv('NCAA_Stat145')
new_df1

Unnamed: 0,Rank145,Team145,GM145,PTS145,PPG145
0,1,Arizona,12,1074,89.5
1,2,Cornell,10,881,88.1
2,3,Iowa,12,1051,87.6
3,4,South Dakota St.,15,1306,87.1
4,5,Purdue,12,1030,85.8
...,...,...,...,...,...
345,346,Jackson St.,11,628,57.1
346,347,South Fla.,12,682,56.8
347,348,Northern Ill.,10,567,56.7
348,349,Alabama A&M,9,493,54.8


In [13]:
new_df2 = pd.read_csv('NCAA_Stat146')
new_df2

Unnamed: 0,Rank146,Team146,GM146,OPP PTS146,OPP PPG146
0,1,Texas,11,583,53.0
1,2,LSU,12,649,54.1
2,3,Fresno St.,13,714,54.9
3,4,Jacksonville,11,605,55.0
4,5,North Texas,10,551,55.1
...,...,...,...,...,...
345,-,Long Beach St.,10,812,81.2
346,347,Central Mich.,11,925,84.1
347,348,Central Ark.,11,932,84.7
348,349,Prairie View,10,884,88.4


In [14]:
new_df3 = pd.read_csv('NCAA_Stat168')
new_df3

Unnamed: 0,Rank168,Team168,W168,L168,Pct168
0,1,Iowa St.,12,0,100.0
1,-,LSU,12,0,100.0
2,-,Southern California,12,0,100.0
3,-,Baylor,11,0,100.0
4,-,Colorado St.,10,0,100.0
...,...,...,...,...,...
345,346,William & Mary,1,12,7.7
346,347,Coppin St.,1,14,6.7
347,348,Fairleigh Dickinson,0,10,0.0
348,-,Mississippi Val.,0,10,0.0


# Now we need to Clean the dataframes and merge them into one Dataframe

We need to clean rows that have value "-". We know that the value "-" means the Statistic Rank of the team is a tie between multiple teams. In order to change the value, we will just have the team with rank "-" be equal to the rank of the team before/above.

## Fixing Rank Values ("-") & Changing Column Names

In [15]:
# Recall: num_list = ['145', '146', '168']

# set stat names to change values
stat_names = ['ScOff','ScDef','WLPerc']
# initialize count object - will help add stat names
count = 0

for num in num_list:
    # read in the csv we made in previous code
    df_num = pd.read_csv('NCAA_Stat' + num)
    
    # add 1 to count as loops through num_list
    count += 1
    
    # Looping through the indecies
    for i in df_num.index:
        # replace rank values if value is "-"
        if df_num['Rank'+num][i] == '-':
            df_num['Rank'+num][i] = df_num['Rank'+num][i - 1]
    
    # Changing Variable Names
    for col in df_num.columns:
        # Replace number in column name with the stat name
        if num in col:
            if 'GM' in col:
                df_num.rename(columns = {col : 'GM'}, 
                              inplace = True)
            elif 'Team' in col:
                df_num.rename(columns = {col : 'School'}, 
                              inplace = True)
            else:
                df_num.rename(columns = {col : col[0:col.find(num[1])-1] + "-" + stat_names[count - 1]}, 
                              inplace = True)
            
            # replace spaces
            if " " in col:
                df_num.rename(columns = {col : col.replace(" ", "_")}, 
                              inplace = True)
            else:
                continue
        else:
            continue
            
    # lets see if code above worked -- it does, can uncomment if wanted
    print(df_num.columns)
    
    # Recreate the file -- Only need to run once
    file_name = 'NCAA_Stat' + num
    #print(file_name)
    
    df_num.to_csv(file_name, index = False)
            

Index(['Rank-ScOff', 'School', 'GM', 'PTS-ScOff', 'PPG-ScOff'], dtype='object')
Index(['Rank-ScDef', 'School', 'GM', 'OPP PTS-ScDef', 'OPP PPG-ScDef'], dtype='object')
Index(['Rank-WLPerc', 'School', 'W-WLPerc', 'L-WLPerc', 'Pct-WLPerc'], dtype='object')


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


# Merge Datasets

In [16]:
df_rank

Unnamed: 0,Rank,Previous,School,Conference,Record,Road,Neutral,Home,Quad 1,Quad 2,Quad 3,Quad 4
0,1,1,Baylor,Big 12,11-0,1-0,3-0,7-0,2-0,2-0,2-0,5-0
1,2,2,Arizona,Pac-12,11-1,2-1,2-0,7-0,2-1,2-0,0-0,7-0
2,3,3,LSU,SEC,12-0,0-0,4-0,8-0,1-0,2-0,5-0,4-0
3,4,4,Houston,AAC,11-2,0-1,3-1,8-0,0-2,3-0,4-0,4-0
4,5,5,Gonzaga,WCC,10-2,0-0,3-2,7-0,3-2,0-0,0-0,7-0
...,...,...,...,...,...,...,...,...,...,...,...,...
353,354,354,Mississippi Val.,SWAC,0-10,0-8,0-0,0-2,0-1,0-4,0-3,0-2
354,355,355,Eastern Ill.,OVC,0-11,0-8,0-1,0-2,0-2,0-1,0-3,0-5
355,356,356,IUPUI,Horizon,0-10,0-4,0-2,0-4,0-0,0-1,0-3,0-6
356,357,357,Maine,America East,0-7,0-5,0-0,0-2,0-1,0-2,0-0,0-4


In [17]:
df_stat1 = pd.read_csv('NCAA_Stat145')
df_stat1

Unnamed: 0,Rank-ScOff,School,GM,PTS-ScOff,PPG-ScOff
0,1,Arizona,12,1074,89.5
1,2,Cornell,10,881,88.1
2,3,Iowa,12,1051,87.6
3,4,South Dakota St.,15,1306,87.1
4,5,Purdue,12,1030,85.8
...,...,...,...,...,...
345,346,Jackson St.,11,628,57.1
346,347,South Fla.,12,682,56.8
347,348,Northern Ill.,10,567,56.7
348,349,Alabama A&M,9,493,54.8


In [19]:
# set stat names to change values
stat_names = ['ScOff','ScDef','WLPerc']
# initialize count object - will help add stat names
count = 0

for num in num_list:
    # read in the csv we made in previous code
    df_stat = pd.read_csv('NCAA_Stat' + num)
    count += 1
    
    df_rank = df_rank.merge(df_stat,
                            how="inner",
                            on='School')
    # if want to see what happens -- uncomment print
    #print(df_rank)

    Rank Previous            School    Conference Record Road Neutral Home  \
0      1        1            Baylor        Big 12   11-0  1-0     3-0  7-0   
1      2        2           Arizona        Pac-12   11-1  2-1     2-0  7-0   
2      3        3               LSU           SEC   12-0  0-0     4-0  8-0   
3      4        4           Houston           AAC   11-2  0-1     3-1  8-0   
4      5        5           Gonzaga           WCC   10-2  0-0     3-2  7-0   
..   ...      ...               ...           ...    ...  ...     ...  ...   
345  354      354  Mississippi Val.          SWAC   0-10  0-8     0-0  0-2   
346  355      355      Eastern Ill.           OVC   0-11  0-8     0-1  0-2   
347  356      356             IUPUI       Horizon   0-10  0-4     0-2  0-4   
348  357      357             Maine  America East    0-7  0-5     0-0  0-2   
349  358      358      Delaware St.          MEAC   0-10  0-6     0-1  0-3   

    Quad 1 Quad 2 Quad 3 Quad 4  Rank-ScOff  GM  PTS-ScOff  PPG

In [20]:
# Make copy of dataset in case a mistake occurs
df_rank_copy = df_rank
df_rank_copy

Unnamed: 0,Rank,Previous,School,Conference,Record,Road,Neutral,Home,Quad 1,Quad 2,...,PTS-ScOff,PPG-ScOff,Rank-ScDef,GM_y,OPP PTS-ScDef,OPP PPG-ScDef,Rank-WLPerc,W-WLPerc,L-WLPerc,Pct-WLPerc
0,1,1,Baylor,Big 12,11-0,1-0,3-0,7-0,2-0,2-0,...,901,81.9,6,11,614,55.8,1,11,0,100.0
1,2,2,Arizona,Pac-12,11-1,2-1,2-0,7-0,2-1,2-0,...,1074,89.5,81,12,768,64.0,7,11,1,91.7
2,3,3,LSU,SEC,12-0,0-0,4-0,8-0,1-0,2-0,...,955,79.6,2,12,649,54.1,1,12,0,100.0
3,4,4,Houston,AAC,11-2,0-1,3-1,8-0,0-2,3-0,...,1032,79.4,7,13,728,56.0,20,11,2,84.6
4,5,5,Gonzaga,WCC,10-2,0-0,3-2,7-0,3-2,0-0,...,1020,85.0,64,12,750,62.5,24,10,2,83.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,354,354,Mississippi Val.,SWAC,0-10,0-8,0-0,0-2,0-1,0-4,...,611,61.1,350,10,906,90.6,348,0,10,0.0
346,355,355,Eastern Ill.,OVC,0-11,0-8,0-1,0-2,0-2,0-1,...,744,57.2,231,13,918,70.6,336,2,11,15.4
347,356,356,IUPUI,Horizon,0-10,0-4,0-2,0-4,0-0,0-1,...,552,50.2,79,11,703,63.9,343,1,10,9.1
348,357,357,Maine,America East,0-7,0-5,0-0,0-2,0-1,0-2,...,580,58.0,103,10,651,65.1,299,3,7,30.0


In [27]:
df_rank_copy.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 350 entries, 0 to 349
Data columns (total 23 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Rank           350 non-null    object 
 1   Previous       350 non-null    object 
 2   School         350 non-null    object 
 3   Conference     350 non-null    object 
 4   Record         350 non-null    object 
 5   Road           350 non-null    object 
 6   Neutral        350 non-null    object 
 7   Home           350 non-null    object 
 8   Quad 1         350 non-null    object 
 9   Quad 2         350 non-null    object 
 10  Quad 3         350 non-null    object 
 11  Quad 4         350 non-null    object 
 12  Rank-ScOff     350 non-null    int64  
 13  GM             350 non-null    int64  
 14  PTS-ScOff      350 non-null    int64  
 15  PPG-ScOff      350 non-null    float64
 16  Rank-ScDef     350 non-null    int64  
 17  OPP PTS-ScDef  350 non-null    int64  
 18  OPP PPG-Sc

In [31]:
# rename column GM_x to GM
df_rank_copy = df_rank_copy.rename(columns = {'GM_x' : 'GM'})
# Drop duplicate column GM_y
df_rank_copy = df_rank_copy.drop(columns = 'GM_y', axis = 1)

In [30]:
df_rank_copy

Unnamed: 0,Rank,Previous,School,Conference,Record,Road,Neutral,Home,Quad 1,Quad 2,...,GM,PTS-ScOff,PPG-ScOff,Rank-ScDef,OPP PTS-ScDef,OPP PPG-ScDef,Rank-WLPerc,W-WLPerc,L-WLPerc,Pct-WLPerc
0,1,1,Baylor,Big 12,11-0,1-0,3-0,7-0,2-0,2-0,...,11,901,81.9,6,614,55.8,1,11,0,100.0
1,2,2,Arizona,Pac-12,11-1,2-1,2-0,7-0,2-1,2-0,...,12,1074,89.5,81,768,64.0,7,11,1,91.7
2,3,3,LSU,SEC,12-0,0-0,4-0,8-0,1-0,2-0,...,12,955,79.6,2,649,54.1,1,12,0,100.0
3,4,4,Houston,AAC,11-2,0-1,3-1,8-0,0-2,3-0,...,13,1032,79.4,7,728,56.0,20,11,2,84.6
4,5,5,Gonzaga,WCC,10-2,0-0,3-2,7-0,3-2,0-0,...,12,1020,85.0,64,750,62.5,24,10,2,83.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,354,354,Mississippi Val.,SWAC,0-10,0-8,0-0,0-2,0-1,0-4,...,10,611,61.1,350,906,90.6,348,0,10,0.0
346,355,355,Eastern Ill.,OVC,0-11,0-8,0-1,0-2,0-2,0-1,...,13,744,57.2,231,918,70.6,336,2,11,15.4
347,356,356,IUPUI,Horizon,0-10,0-4,0-2,0-4,0-0,0-1,...,11,552,50.2,79,703,63.9,343,1,10,9.1
348,357,357,Maine,America East,0-7,0-5,0-0,0-2,0-1,0-2,...,10,580,58.0,103,651,65.1,299,3,7,30.0


# Make Final Dataset a CSV

Note: Can add/drop more variables if wanted -- For this, I chose to just add a few

In [32]:
final_file_name = 'NCAA_BBStat_df'
df_rank_copy.to_csv(final_file_name, index = False)