In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
#this is to make sure the height doesn't mix up in the csv file as the date of month
def PlayerHeightConversion(height):
    # Convert height from "5-5" to "5'5""
    parts = height.split('-')
    if len(parts) == 2:
        return f"{parts[0]}'{parts[1]}\""
    return height  # Return as is if not in expected format


def DataScraping(urls, player_name, player_height):
    data = []
    for url in urls:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        players = soup.find_all('tr')
        for player in players:
            player_names = player.find('td', class_=player_name)
            player_heights = player.find('td', class_=player_height)
            if player_names and player_heights:
                name = player_names.text.strip()
                height = player_heights.text.strip()
                data.append({'Name': name, 'Height': height})
    return pd.DataFrame(data)

def analyze_data(df):
    # Sort by name as an example, since no conversion or sorting by height is done
    sorted_df = df.sort_values(by='Name', ascending=True)
    tallest_players = sorted_df.tail(8) #this will display the tallest 8 one in the output
    shortest_players = sorted_df.head(8)  # this will display the shortest 8 one in the output
    
    return tallest_players, shortest_players


sports_data = {
    'Men_Swimming_Diving': {
        'urls': ['https://csidolphins.com/sports/mens-swimming-and-diving/roster',
    'https://yorkathletics.com/sports/mens-swimming-and-diving/roster',
    'https://athletics.baruch.cuny.edu/sports/mens-swimming-and-diving/roster',
    'https://www.brooklyncollegeathletics.com/sports/mens-swimming-and-diving/roster',
    'https://lindenwoodlions.com/sports/mens-swimming-and-diving/roster',
    'https://mckbearcats.com/sports/mens-swimming-and-diving/roster',
    'https://ramapoathletics.com/sports/mens-swimming-and-diving/roster',
    'https://oneontaathletics.com/sports/mens-swimming-and-diving/roster',
    'https://bubearcats.com/sports/mens-swimming-and-diving/roster/2021-22',
    'https://albrightathletics.com/sports/mens-swimming-and-diving/roster/2021-22'],
        'player_name': 'sidearm-table-player-name',
        'player_height': 'height',
        'file_name': 'Men_Swimming_Diving.csv'
    },
    'Men_Volleyball': {
        'urls': ['https://lehmanathletics.com/sports/mens-volleyball/roster?view=2',
    'https://johnjayathletics.com/sports/mens-volleyball/roster/',
    'https://athletics.baruch.cuny.edu/sports/mens-volleyball/roster/',
    'https://ccnyathletics.com/sports/mens-volleyball/roster',
    'https://mecathletics.com/sports/mens-volleyball/roster',
    'https://www.huntercollegeathletics.com/sports/mens-volleyball/roster',
    'https://yorkathletics.com/sports/mens-volleyball/roster',
    'https://ballstatesports.com/sports/mens-volleyball/roster'],
        'player_name': 'sidearm-table-player-name',
        'player_height': 'height',
        'file_name': 'Men_Volleyball.csv'
    },
    'Women_SwimmingAndDiving': {
        'urls': ['https://csidolphins.com/sports/womens-swimming-and-diving/roster',
    'https://queensknights.com/sports/womens-swimming-and-diving/roster',
    'https://yorkathletics.com/sports/womens-swimming-and-diving/roster',
    'https://athletics.baruch.cuny.edu/sports/womens-swimming-and-diving/roster/2021-22?path=wswim',
    'https://www.brooklyncollegeathletics.com/sports/womens-swimming-and-diving/roster',
    'https://lindenwoodlions.com/sports/womens-swimming-and-diving/roster',
    'https://mckbearcats.com/sports/womens-swimming-and-diving/roster',
    'https://ramapoathletics.com/sports/womens-swimming-and-diving/roster',
    'https://keanathletics.com/sports/womens-swimming-and-diving/roster',
    'https://oneontaathletics.com/sports/womens-swimming-and-diving/roster'],
        'player_name': 'sidearm-table-player-name',
        'player_height': 'height',
        'file_name': 'Women_SwimmingAndDiving.csv'
    },
    'Women_Volleyball': {
        'urls': ['https://bmccathletics.com/sports/womens-volleyball/roster?view=2',
    'https://yorkathletics.com/sports/womens-volleyball/roster',
    'https://hostosathletics.com/sports/womens-volleyball/roster',
    'https://bronxbroncos.com/sports/womens-volleyball/roster/2021',
    'https://queensknights.com/sports/womens-volleyball/roster',
    'https://augustajags.com/sports/wvball/roster',
    'https://flaglerathletics.com/sports/womens-volleyball/roster',
    'https://pacersports.com/sports/womens-volleyball/roster',
    'https://www.golhu.com/sports/womens-volleyball/roster'],
        'player_name': 'sidearm-table-player-name',
        'player_height': 'height',
        'file_name': 'Women_Volleyball.csv'
    }
}
#savetocsv function to save dataframe to the csv file
def save_to_csv(df, file_name):
    df.to_csv(file_name, index=False)
    print(f"Data saved to {file_name}")

# Process and save data for each sport
for x, details in sports_data.items():
    df = DataScraping(details['urls'], details['player_name'], details['player_height'])
    tallest_players, shortest_players = analyze_data(df)
    print(f"\nTallest Players in {x}:\n", tallest_players)
    print(f"\nShortest Players in {x}:\n", shortest_players)
    save_to_csv(df, details['file_name'])



Tallest Players in Men_Swimming_Diving:
                      Name Height
165        William Maurer   5-10
16      Wood-by Paulemont    5-9
94            Wyatt Kurtz    5-5
98       Xander McWhorter   5-11
163            Xiaoyu Lin    6-1
21      Youssef  Elkhouly    6-3
152       Zachary Ciriaco    5-6
43   Zurab Chkhartishvili      -

Shortest Players in Men_Swimming_Diving:
                    Name Height
122          AJ Guevara    5-6
42     Aaron Brijbukhan      -
90       Abram Hagedorn    6-1
72        Adam Szczerba    6-4
114  Alessandro Alvarez    5-9
19          Alex Winter    6-0
113      Alireza Yavari   5-10
49    Allen Mardakhayev      -
Data saved to Men_Swimming_Diving.csv

Tallest Players in Men_Volleyball:
                 Name Height
98   Trevor Phillips    6-6
99    Vanis Buckholz    6-6
84    Vincent Agokei    6-3
27        Wen Cheung    5-9
107   Wil McPhillips    6-4
112  Will  Patterson   6-10
95       Xander Pink    5-7
34       Zachary Yao    6-0

Shortest Pl