In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from datetime import datetime, timedelta

# Function to get game results for a specific date
def get_game_results(year, month, day):
    url = f"https://www.ncaa.com/scoreboard/lacrosse-women/d1/{year}/{month:02d}/{day:02d}/all-conf"
    print(f"Fetching data from: {url}")

    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to retrieve data for {year}-{month:02d}-{day:02d}: {response.status_code}")
        return None

    soup = BeautifulSoup(response.text, 'html.parser')
    games = []

    # Find game containers
    for game in soup.find_all("div", class_="gamePod"):
        try:
            date = f"{year}-{month:02d}-{day:02d}"
            status = game.find("div", class_="gamePod-status").text.strip()  # e.g., "FINAL"
            round_info = game.find("span", class_="game-round")
            round_name = round_info.text.strip() if round_info else "Unknown Round"

            teams = game.find_all("span", class_="gamePod-game-team-name")
            scores = game.find_all("span", class_="gamePod-game-team-score")
            ranks = game.find_all("span", class_="gamePod-game-team-rank")

            team1 = teams[0].text.strip() if len(teams) > 0 else "Unknown"
            team2 = teams[1].text.strip() if len(teams) > 1 else "Unknown"
            score1 = scores[0].text.strip() if len(scores) > 0 else "N/A"
            score2 = scores[1].text.strip() if len(scores) > 1 else "N/A"
            rank1 = ranks[0].text.strip() if len(ranks) > 0 else "N/A"
            rank2 = ranks[1].text.strip() if len(ranks) > 1 else "N/A"

            winner_class = game.find_all("li", class_="winner")
            winner = winner_class[0].find("span", class_="gamePod-game-team-name").text.strip() if winner_class else "N/A"

            games.append([date, round_name, status, team1, rank1, score1, team2, rank2, score2, winner])

        except Exception as e:
            print(f"Error parsing game data for {date}: {e}")

    return games

# Set the range of years
START_YEAR = 2014
END_YEAR = 2024

for year in range(START_YEAR, END_YEAR + 1):
    all_games = []
    start_date = datetime(year, 1, 1)
    end_date = datetime(year, 5, 30)
    current_date = start_date

    while current_date <= end_date:
        month, day = current_date.month, current_date.day
        games_data = get_game_results(year, month, day)

        if games_data:
            all_games.extend(games_data)


        time.sleep(1.5)


        current_date += timedelta(days=1)


    if all_games:
        df = pd.DataFrame(all_games, columns=["Date", "Round", "Status", "Team 1", "Rank 1", "Score 1", "Team 2", "Rank 2", "Score 2", "Winner"])
        filename = f"ncaa_womens_lacrosse_results_{year}.csv"
        df.to_csv(filename, index=False)
        print(f"Data for {year} (Aug–Dec) saved to {filename}")
    else:
        print(f"No games found for {year} (Aug–Dec)")

print("Scraping completed for the last 10 seasons (August–December).")

Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/01/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/02/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/03/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/04/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/05/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/06/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/07/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/08/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/09/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/10/all-conf
Fetching data from: https://www.ncaa.com/scoreboard/lacrosse-women/d1/2014/01/11/all-conf
Fetching d

# New Section

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import math

class EloRatingSystem:
    def __init__(self, k=32, initial_rating=1500):
        self.k = k
        self.ratings = {}
        self.initial_rating = initial_rating


    def expected_score(self, rating1, rating2):
        return 1 / (1 + 10 ** ((rating2 - rating1) / 400))

    def update_ratings(self, team1, team2, score1, score2):
        if team1 not in self.ratings:
            self.ratings[team1] = self.initial_rating
        if team2 not in self.ratings:
            self.ratings[team2] = self.initial_rating

        rating1, rating2 = self.ratings[team1], self.ratings[team2]
        exp_score1 = self.expected_score(rating1, rating2)
        exp_score2 = self.expected_score(rating2, rating1)

        actual_score1 = 1 if score1 > score2 else 0.5 if score1 == score2 else 0
        actual_score2 = 1 - actual_score1


        mov = abs(score1 - score2)
        mov_multiplier = math.log(mov + 1) * (2.2 / (1 + 0.001 * abs(rating1 - rating2)))


        self.ratings[team1] += self.k * mov_multiplier * (actual_score1 - exp_score1)
        self.ratings[team2] += self.k * mov_multiplier * (actual_score2 - exp_score2)

    def normalize_end_of_season(self):
        """Applies regression to the mean to avoid long-term inflation/deflation."""
        for team in self.ratings:
            if self.ratings[team] > self.initial_rating:
                self.ratings[team] -= (self.ratings[team] - self.initial_rating) * 0.25
            else:
                self.ratings[team] += (self.initial_rating - self.ratings[team]) * 0.25

    def get_ratings(self):
        return self.ratings




In [2]:
from google.colab import files

uploaded = files.upload()

for filename in uploaded.keys():
    print('User uploaded file "{name}" with length {length} bytes'.format(
        name=filename, length=len(uploaded[filename])))




Saving ncaa_womens_hockey_results_2014.csv to ncaa_womens_hockey_results_2014.csv
Saving ncaa_womens_lacrosse_results_2014 (1).csv to ncaa_womens_lacrosse_results_2014 (1).csv
Saving ncaa_womens_lacrosse_results_2014.csv to ncaa_womens_lacrosse_results_2014.csv
Saving ncaa_womens_lacrosse_results_2015.csv to ncaa_womens_lacrosse_results_2015.csv
Saving ncaa_womens_lacrosse_results_2016.csv to ncaa_womens_lacrosse_results_2016.csv
Saving ncaa_womens_lacrosse_results_2017.csv to ncaa_womens_lacrosse_results_2017.csv
Saving ncaa_womens_lacrosse_results_2018.csv to ncaa_womens_lacrosse_results_2018.csv
Saving ncaa_womens_lacrosse_results_2019.csv to ncaa_womens_lacrosse_results_2019.csv
Saving ncaa_womens_lacrosse_results_2020.csv to ncaa_womens_lacrosse_results_2020.csv
Saving ncaa_womens_lacrosse_results_2021.csv to ncaa_womens_lacrosse_results_2021.csv
Saving ncaa_womens_lacrosse_results_2022.csv to ncaa_womens_lacrosse_results_2022.csv
Saving ncaa_womens_lacrosse_results_2023.csv to nc

In [None]:
elo_system = EloRatingSystem()


In [3]:
import pandas as pd
import io

all_data = []
for filename, content in uploaded.items():
    df = pd.read_csv(io.BytesIO(content))
    all_data.append(df)

combined_data = pd.concat(all_data, ignore_index=True)
combined_data['Date'] = pd.to_datetime(combined_data['Date'])
combined_data.sort_values('Date', inplace=True)

print(combined_data.head())


           Date          Round Status      Team 1  Rank 1  Score 1  \
0    2014-02-07  Unknown Round  Final     Liberty     NaN      7.0   
785  2014-02-07  Unknown Round    NaN  Vanderbilt     NaN      0.0   
786  2014-02-07  Unknown Round    NaN        Elon     NaN      0.0   
1566 2014-02-07  Unknown Round  Final     Liberty     NaN      7.0   
1567 2014-02-07  Unknown Round  Final     Detroit     NaN      3.0   

            Team 2  Rank 2  Score 2    Winner  
0         Richmond     NaN     16.0  Richmond  
785   Kennesaw St.     NaN      0.0       NaN  
786           Duke     NaN      0.0       NaN  
1566      Richmond     NaN     16.0  Richmond  
1567      Ohio St.     NaN     18.0  Ohio St.  


In [8]:

for index, row in combined_data.iterrows():
    if pd.notna(row['Score 1']) and pd.notna(row['Score 2']):
        score1, score2 = int(row['Score 1']), int(row['Score 2'])
        elo_system.update_ratings(row['Team 1'], row['Team 2'], score1, score2)

final_ratings = pd.DataFrame(list(elo_system.get_ratings().items()), columns=['Team', 'Rating']).sort_values(by='Rating', ascending=False)
print(final_ratings)



NameError: name 'elo_system' is not defined

In [9]:
def display_elo_ratings(elo_system):

    sorted_ratings = sorted(elo_system.ratings.items(), key=lambda x: x[1], reverse=True)


    print("Team Names and Their Elo Ratings:")
    for team, rating in sorted_ratings:
        print(f"{team}: {rating:.2f}")


display_elo_ratings(elo_system)


NameError: name 'elo_system' is not defined

In [10]:
def calculate_win_probability(elo_system, team1, team2):
    if team1 in elo_system.ratings and team2 in elo_system.ratings:
        rating1 = elo_system.ratings[team1]
        rating2 = elo_system.ratings[team2]
        expected_score_team1 = 1 / (1 + 10 ** ((rating2 - rating1) / 400))
        expected_score_team2 = 1 - expected_score_team1
        return expected_score_team1, expected_score_team2
    else:
        return None




In [None]:
team1 = input("Enter the name of Team 1: ")
team2 = input("Enter the name of Team 2: ")

probabilities = calculate_win_probability(elo_system, team1, team2)
if probabilities:
    print(f"Probability of {team1} winning: {probabilities[0] * 100:.2f}%")
    print(f"Probability of {team2} winning: {probabilities[1] * 100:.2f}%")
else:
    print("One or both of the teams are not found in the current Elo ratings.")
