In [3]:
import requests
from bs4 import BeautifulSoup
import pymongo


def get_soup(url):
    response = requests.get(url)
    return BeautifulSoup(response.text, 'lxml')


def extract_team_data(soup):
    teams_data = []
    teams = soup.find('div', class_='complexTableScroll').find_all('tr')[1:]
    for team in teams:
        details = team.find_all('td')
        teams_data.append({
            'Team': team.find('span', {'class': 'identity__label'}).text.strip(),
            'Members': details[1].text.strip(),
            'Foreign Players': details[2].text.strip(),
            'Mean Age': details[3].text.strip(),
            'Salaries': details[4].text.strip(),
            'Spending': details[5].text.strip(),
            'MOY': details[6].text.strip()
        })
    return teams_data

# Function to merge ranking information into the team data
def merge_ranking(data, soup):
    teams = soup.find('div', class_='rankingTable__tableScroll').find_all('tr')[1:]
    for index, team in enumerate(teams):
        details = team.find_all('td', {'class': 'rankingTable__acronym'})
        data[index].update({
            'Rank': index + 1,
            'Points': team.find('td', {'class': 'rankingTable__points'}).text.strip(),
            'DIF': details[1].text.strip(),
            'Gain': details[2].text.strip(),
            'Null': details[3].text.strip(),
            'Defeat': details[4].text.strip(),
            'BP': details[5].text.strip(),
            'BC': details[6].text.strip()
        })

# Function to clear the collection in MongoDB
def clear_collection(mongodb_uri, database_name, collection_name):
    try:
        client = pymongo.MongoClient(mongodb_uri)
        db = client[database_name]
        collection = db[collection_name]
        collection.delete_many({})  # This deletes all documents in the collection
        print("Collection cleared successfully!")
    except pymongo.errors.ConnectionFailure as e:
        print("Failed to connect to MongoDB:", e)
    except Exception as e:
        print("An error occurred:", e)
    finally:
        client.close()

# Function to upload data to MongoDB
def upload_to_mongodb(data, mongodb_uri, database_name, collection_name):
    try:
        client = pymongo.MongoClient(mongodb_uri)
        db = client[database_name]
        collection = db[collection_name]
        collection.insert_many(data)
        print("Data uploaded successfully to MongoDB!")
    except pymongo.errors.ConnectionFailure as e:
        print("Failed to connect to MongoDB:", e)
    except Exception as e:
        print("An error occurred:", e)
    finally:
        client.close()

# Main execution function
def main():
    year = '2022-2023'
    base_url = f'https://www.footmercato.net/angleterre/premier-league/{year}'
    mongodb_uri = "mongodb://51.20.249.235:27017"
    database_name = "ca2"
    collection_name = "soccer"
    
    clear_collection(mongodb_uri, database_name, collection_name)  # Clear the collection at the start
    
    soup1 = get_soup(base_url + '/club')
    soup2 = get_soup(base_url + '/classement')
    team_data = extract_team_data(soup1)
    merge_ranking(team_data, soup2)
    
    upload_to_mongodb(team_data, mongodb_uri, database_name, collection_name)

# Execute the main function
if __name__ == "__main__":
    main()


Collection cleared successfully!
Data uploaded successfully to MongoDB!
