In [16]:
#import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
import random
import os


In [17]:
#create list of teams as shown in url and as shown in table
atl10_teams_url = ['massachusetts','duquesne','st-bonaventure','loyola-il','george-mason','dayton','george-washington',
                'rhode-island','richmond','saint-josephs','saint-louis','virginia-commonwealth','la-salle','fordham','davidson']

atl10_teams = ['UMass','Duquesne','St. Bonaventure','Loyola (IL)','George Mason','Dayton','George Washington','Rhode Island','Richmond',
               'St. Joseph\'s','Saint Louis','VCU','La Salle','Fordham','Davidson']

In [18]:
#read csv if it exists to save time
if os.path.exists('Atlantic10-2024-gamelogs.csv'):
    atl10_df = pd.read_csv('Atlantic10-2024-gamelogs.csv', index_col=0)

else:
    
    #create conference dataframe
    atl10_df = pd.DataFrame()

    #loop through teams, reading each table and concatinating it with our dataframe
    for index, team in enumerate(atl10_teams_url):

        #create url for lookup
        url = f'http://www.sports-reference.com/cbb/schools/{team}/men/2024-gamelogs.html'

        #read table from html
        df = pd.read_html(url, header=1, attrs={'id': 'sgl-basic_NCAAM'})[0]

        #create team name column and insert it in index 3
        df.insert(loc=3, column='Team', value=atl10_teams[index])
        
        #concatenate team dataframe with conference dataframe
        atl10_df = pd.concat([atl10_df, df], ignore_index=True)

        #sleep to avoid error 429: too many requests
        time.sleep(random.randint(4,6))
    
    #write to csv so we don't have to wait a minute everytime we reload the data   
    atl10_df.to_csv('Atlantic10-2024-gamelogs.csv')

In [19]:
#drop non-conference games
atl10_df = atl10_df[atl10_df['Opp'].isin(atl10_teams)]

In [27]:
#reset index after removing rows
atl10_df.reset_index(drop=True, inplace=True)

In [28]:
#rename unnamed column to Home/Away and modify column to show H for Home, A for Away, and N for Neutral
atl10_df = atl10_df.rename(columns={'Unnamed: 2' : 'H/A'})

atl10_df['H/A'] = atl10_df['H/A'].apply(lambda x: 'Away' if x == '@' else 'Neutral' if x == 'N' else 'Home')

In [None]:
#rename all columns with .1 to show that it is the opponents stats
atl10_df.rename

In [29]:
atl10_df

Unnamed: 0,G,Date,H/A,Team,Opp,W/L,Tm,Opp.1,FG,FGA,...,FT.1,FTA.1,FT%.1,ORB.1,TRB.1,AST.1,STL.1,BLK.1,TOV.1,PF.1
0,13,2024-01-03,Home,UMass,Duquesne,W,80,61,27,51,...,13,22,.591,12,30,11,7,0,14,23
1,14,2024-01-07,Home,UMass,Dayton,L,60,64,24,66,...,18,21,.857,6,29,12,4,2,11,11
2,15,2024-01-10,Home,UMass,La Salle,W,81,65,31,63,...,6,15,.400,7,27,15,6,4,12,16
3,16,2024-01-13,Home,UMass,Rhode Island,L,77,89,27,66,...,21,28,.750,8,34,14,0,2,9,23
4,17,2024-01-17,Home,UMass,Loyola (IL),L,78,79,24,58,...,21,31,.677,12,32,20,7,4,12,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
293,28,2024-02-27,Home,Davidson,Dayton,L,66,80,22,58,...,10,12,.833,4,33,17,7,6,13,13
294,29,2024-03-02,Home,Davidson,UMass,L,67,69,27,56,...,16,23,.696,18,42,8,4,4,9,16
295,30,2024-03-06,Home,Davidson,Loyola (IL),L,59,69,21,55,...,12,21,.571,6,33,13,8,7,11,20
296,31,2024-03-09,Home,Davidson,St. Joseph's,L,71,89,27,67,...,23,31,.742,10,34,15,7,4,11,14
